# Newsfeed Recommendation Algorithm

In [2]:
# Cache imports
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler


In [1]:

# Fetching the dataset from Kaggle
def fetch_data(url):
    return pd.read_csv(url)

# Preprocessing the data
def preprocess_data(df):
    df['timestamp'] = pd.to_datetime(df['date_posted'])
    df['content_length'] = df['content'].apply(len)
    df['day_of_week'] = df['timestamp'].dt.dayofweek
    df['hour_of_day'] = df['timestamp'].dt.hour
    df['engagement_score'] = df[['num_comments', 'num_shares']].sum(axis=1)
    return df

def normalize_features(df, features):
    scaler = MinMaxScaler()
    return pd.DataFrame(scaler.fit_transform(df[features]), columns=features)

# Model class for recommendation
class NewsFeedRecommendationModel:
    def __init__(self, input_dim, learning_rate=0.001):
        self.model = tf.keras.Sequential([
            tf.keras.layers.Dense(64, activation='relu', input_dim=input_dim),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Dropout(0.5),
            tf.keras.layers.Dense(32, activation='relu'),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Dropout(0.3),
            tf.keras.layers.Dense(1, activation='sigmoid')
        ])
        self.model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate), loss='binary_crossentropy', metrics=['accuracy'])
    
    def train(self, X_train, y_train, epochs=50, batch_size=32):
        history = self.model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.2, verbose=1)
        return history
    
    def evaluate(self, X_test, y_test):
        test_loss, test_accuracy = self.model.evaluate(X_test, y_test)
        return test_loss, test_accuracy

# Visualization functions
def plot_training_history(history):
    # Plotting accuracy
    plt.figure(figsize=(12, 6))
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Model Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    
    # Plotting loss
    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Model Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    
    plt.tight_layout()
    plt.show()

def plot_engagement_distribution(data):
    plt.figure(figsize=(10, 6))
    sns.histplot(data['engagement_score'], bins=50, kde=True)
    plt.title('Engagement Score Distribution')
    plt.xlabel('Engagement Score')
    plt.ylabel('Frequency')
    plt.show()

# Main driver function
def main():
    url = "https://www.kaggleusercontent.com/datasets/sheenabatra/facebook-data.csv"
    data = fetch_data(url)
    
    # Preprocess data
    data = preprocess_data(data)
    
    # Define features and target
    features = ['content_length', 'day_of_week', 'hour_of_day', 'engagement_score']
    target = 'likes'
    
    X = normalize_features(data, features)
    y = data[target]
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Instantiate and train the model
    model = NewsFeedRecommendationModel(input_dim=X_train.shape[1])
    history = model.train(X_train, y_train, epochs=50, batch_size=32)
    
    # Evaluate the model
    test_loss, test_accuracy = model.evaluate(X_test, y_test)
    print(f"Test Accuracy: {test_accuracy:.4f}")
    
    # Visualization
    plot_training_history(history)
    plot_engagement_distribution(data)

# Run the main driver function
if __name__ == "__main__":
    main()


2024-12-24 18:35:24.698237: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


URLError: <urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1123)>