# Feature Engineering and Metrics

In [4]:
import pandas as pd
import numpy as np

# Load datasets
influencers = pd.read_csv("influencers.csv")
posts = pd.read_csv("posts.csv")
tracking_data = pd.read_csv("tracking_data.csv")
payouts = pd.read_csv("payouts.csv")

# --- Compute Engagement Rate ---
posts['engagement_rate'] = (posts['likes'] + posts['comments']) / posts['reach']
engagement_df = posts.groupby('influencer_id')['engagement_rate'].mean().reset_index()

# --- Compute Total Revenue per Influencer ---
revenue_df = tracking_data.groupby('influencer_id')['revenue'].sum().reset_index()
revenue_df.rename(columns={'revenue': 'total_revenue'}, inplace=True)

# --- Merge with Payouts to calculate ROAS ---
merged = payouts.merge(revenue_df, on='influencer_id', how='left')
merged = merged.merge(engagement_df, on='influencer_id', how='left')
merged = merged.merge(influencers, left_on='influencer_id', right_on='ID', how='left')

# Fill NaN revenue or engagement with 0
merged['total_revenue'].fillna(0, inplace=True)
merged['engagement_rate'].fillna(0, inplace=True)

# --- Calculate Metrics ---
merged['ROAS'] = merged['total_revenue'] / merged['total_payout']
merged['revenue_per_order'] = merged['total_revenue'] / (merged['orders'] + 1e-6)
merged['payout_efficiency'] = merged['total_revenue'] / (merged['total_payout'] + 1e-6)

# Clean output
metrics_df = merged[[
    'influencer_id', 'name', 'platform', 'category', 'gender', 'follower_count', 'tier',
    'basis', 'rate', 'orders', 'total_payout', 'total_revenue', 'ROAS',
    'engagement_rate', 'payout_efficiency'
]].sort_values(by='ROAS', ascending=False)

# Save result
metrics_df.to_csv("influencer_metrics.csv", index=False)
print("influencer_metrics.csv has been saved.")

influencer_metrics.csv has been saved.
