In [1]:
import random
import pandas as pd
import numpy as np
df = pd.read_excel("/content/drive/MyDrive/behaviour_simulation_train.xlsx")


df.shape
df.head()
df.info()
df.isnull().sum()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 300000 entries, 0 to 299999
Data columns (total 7 columns):
 #   Column            Non-Null Count   Dtype 
---  ------            --------------   ----- 
 0   id                300000 non-null  int64 
 1   date              300000 non-null  object
 2   likes             300000 non-null  int64 
 3   content           300000 non-null  object
 4   username          300000 non-null  object
 5   media             300000 non-null  object
 6   inferred company  300000 non-null  object
dtypes: int64(2), object(5)
memory usage: 16.0+ MB


Unnamed: 0,0
id,0
date,0
likes,0
content,0
username,0
media,0
inferred company,0


In [2]:
df.dropna(subset=['content', 'username', 'inferred company', 'likes'], inplace=True)
df['media'] = df['media'].fillna('no_media')


df['has_media'] = df['media'].apply(lambda x: x != 'no_media')
df['content'] = df['content'].astype(str).str.strip().str.lower()
df['datetime'] = pd.to_datetime(df['date'], errors='coerce')

In [3]:
df['hour'] = df['datetime'].dt.hour
df['day_of_week'] = df['datetime'].dt.day_name()
df['word_count'] = df['content'].apply(lambda x: len(x.split()))
df['char_count'] = df['content'].apply(len)

In [4]:
from textblob import TextBlob

df['sentiment'] = df['content'].apply(lambda x: TextBlob(x).sentiment.polarity)

In [5]:
df['char_count'] = df['content'].apply(len)

In [6]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
df['company_encoded'] = le.fit_transform(df['inferred company'])

In [7]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
df['day_of_week_encoded'] = le.fit_transform(df['day_of_week'])

In [8]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import joblib
from math import sqrt
import numpy as np


y = np.log1p(df['likes'])


X = df[['word_count', 'char_count',  'hour', 'sentiment','company_encoded','day_of_week_encoded']]


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestRegressor()
model.fit(X_train, y_train)


log_preds = model.predict(X_test)


preds = np.expm1(log_preds)



y_test_original = np.expm1(y_test)


from sklearn.metrics import mean_squared_error
rmse = mean_squared_error(y_test_original, preds)
print("RMSE (original scale):", sqrt(rmse))



y_mean_log = y_train.mean()
baseline_log_preds = [y_mean_log] * len(y_test)
baseline_preds = np.expm1(baseline_log_preds)
baseline_rmse = mean_squared_error(y_test_original, baseline_preds)
print("Baseline RMSE (Mean Prediction, original scale):", sqrt(baseline_rmse))
import joblib
joblib.dump(model, 'like_predictor.pkl')

RMSE (original scale): 4480.344761857787
Baseline RMSE (Mean Prediction, original scale): 4881.240219298552


['like_predictor.pkl']

In [9]:
class SimpleTweetGenerator:
  def __init__(self) -> None:
    self.template={
        "Announcement":[
                "🚀 Exciting news from {company}! {message}",
                "Big announcement: {company} is {message} 🎉",
                "Hey everyone! {company} has {message} ✨"
        ],
        "question":[
                "What do you think about {topic}? Let us know! 💬",
                "Quick question: How do you feel about {topic}? 🤔",
                "{company} wants to know: What's your take on {topic}? 🗣️"
        ],
        "general":[
                "Check out what {company} is up to! {message} 🌟",
                "{company} update: {message} 💯",
                "From the {company} team: {message} 🔥"

        ]

    }
  def generate_tweet(self, company, tweet_type="general", message="Something awesome!", topic="innovation")-> str:
        template_list = self.template.get(tweet_type, self.template['general'])
        template = random.choice(template_list)


        tweet = template.format(
            company=company,
            message=message,
            topic=topic
        )


        if len(tweet) > 280:
            tweet = tweet[:277] + "..."

        return tweet


generator = SimpleTweetGenerator()
test_tweet = generator.generate_tweet("Nike", "announcement", "launching new running shoes")
print(test_tweet)





From the Nike team: launching new running shoes 🔥


In [10]:

generator = SimpleTweetGenerator()

print("Test 1:", generator.generate_tweet("Starbucks", "question", topic="coffee"))
print("Test 2:", generator.generate_tweet("Apple", "announcement", "releasing iOS update"))
print("Test 3:", generator.generate_tweet("Tesla", "general", "changing the world"))

Test 1: Starbucks wants to know: What's your take on coffee? 🗣️
Test 2: Check out what Apple is up to! releasing iOS update 🌟
Test 3: Check out what Tesla is up to! changing the world 🌟


In [11]:

from flask import Flask, request, jsonify
import threading

app = Flask(__name__)
generator = SimpleTweetGenerator()

@app.route('/generate', methods=['POST'])
def generate():
    try:
        data = request.get_json()
        company = data.get('company', 'Our Company')
        tweet_type = data.get('tweet_type', 'general')
        message = data.get('message', 'Something awesome!')
        topic = data.get('topic', 'innovation')
        generated_tweet = generator.generate_tweet(company, tweet_type, message, topic)
        return jsonify({
            'generated_tweet': generated_tweet,
            'success': True,
            'company': company,
            'type': tweet_type
        })
    except Exception as e:
        return jsonify({
            'error': str(e),
            'success': False
        }), 500

@app.route('/health', methods=['GET'])
def health():
    return jsonify({'status': 'Tweet Generator API is running!'})
def run_flask():
  app.run(host='0.0.0.0',port=5001)

flask_thread = threading.Thread(target=run_flask)
flask_thread.daemon = True
flask_thread.start()


 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5001
 * Running on http://172.28.0.12:5001


In [12]:
from flask import Flask, request, jsonify
import joblib
import numpy as np

import threading


model = joblib.load('like_predictor.pkl')




app = Flask(__name__)

@app.route('/predict', methods=['POST'])
def predict():
    data = request.get_json()
    features = np.array([
        data['word_count'],
        data['char_count'],
        data['hour'],
        data['sentiment'],
        data['company_encoded'],
        data['day_of_week_encoded']
    ]).reshape(1, -1)
    prediction = model.predict(features)[0]
    return jsonify({'predicted_likes': int(prediction)})

def run_flask():
    app.run(host='0.0.0.0', port=5000)


flask_thread = threading.Thread(target=run_flask)
flask_thread.daemon = True
flask_thread.start()

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://172.28.0.12:5000


In [13]:
import requests

# Test your new generator API
response = requests.post('http://localhost:5001/generate', json={
    'company': 'Starbucks',
    'tweet_type': 'question',
    'message': 'trying new recipes',
    'topic': 'coffee'
})

print("Generated Tweet:", response.json())


INFO:werkzeug:127.0.0.1 - - [04/Jul/2025 18:37:11] "POST /generate HTTP/1.1" 200 -


Generated Tweet: {'company': 'Starbucks', 'generated_tweet': "Starbucks wants to know: What's your take on coffee? 🗣️", 'success': True, 'type': 'question'}


In [14]:
!curl -X POST http://localhost:5001/generate -H "Content-Type: application/json" -d '{"company": "Nike", "tweet_type": "announcement", "message": "launching new Air Max shoes", "topic": "running"}'


INFO:werkzeug:127.0.0.1 - - [04/Jul/2025 18:37:17] "POST /generate HTTP/1.1" 200 -


{"company":"Nike","generated_tweet":"Check out what Nike is up to! launching new Air Max shoes \ud83c\udf1f","success":true,"type":"announcement"}


In [15]:
import requests

# Test prediction API (from Week 2)
prediction_response = requests.post('http://localhost:5000/predict', json={
    'word_count': 15,
    'char_count': 120,
    'company_encoded':1,
    'day_of_week_encoded': 3,
    'hour': 14,
    'sentiment': 0.8
})

# Test generation API (from Week 3)
generation_response = requests.post('http://localhost:5001/generate', json={
    'company': 'Nike',
    'tweet_type': 'announcement',
    'message': 'launching new product',
    'topic': 'sports'
})

print("Predicted Likes:", prediction_response.json())
print("Generated Tweet:", generation_response.json())

INFO:werkzeug:127.0.0.1 - - [04/Jul/2025 18:37:19] "POST /predict HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [04/Jul/2025 18:37:19] "POST /generate HTTP/1.1" 200 -


Predicted Likes: {'predicted_likes': 0}
Generated Tweet: {'company': 'Nike', 'generated_tweet': 'Check out what Nike is up to! launching new product 🌟', 'success': True, 'type': 'announcement'}
