In [7]:
import pandas as pd

df = pd.read_csv('website-landings.csv')

# Keep rows where either 'Source' or 'Campaign Type' is not NaN
filtered_df = df[df['Source'].notna() | df['Campaign Type'].notna()]

# Save the filtered DataFrame
filtered_df.to_csv('filtered_landing.csv', index=False)



In [None]:
df = pd.read_csv('filtered_landing.csv')
unique_sources = df['Source'].unique()
print(unique_sources)

In [None]:
sources_to_keep = ['google', 'youtube', 'bing', 'gmail', 'instagram', 'facebook', 'linkedin', 'skype']

filtered_df = df[df['Source'].isin(sources_to_keep)]

In [None]:
condition = filtered_df['Source'].isin(['facebook', 'instagram'])

filtered_df.loc[condition, 'Campaign Type'] = 'Meta'

In [None]:
filtered_df.to_csv('last.csv', index=False)

In [None]:
df.replace("", pd.NA, inplace=True)
cleaned_df = df.dropna(subset=['Campaign Type'])

cleaned_df.to_csv('final_last.csv', index=False)

In [5]:
import pandas as pd
from collections import defaultdict
df = pd.read_csv('final_last.csv')
df['Website Landing Time'] = pd.to_datetime(df['Website Landing Time'])
df = df.sort_values(['User Id', 'Website Landing Time'])

credit_scores = defaultdict(float)
touch_points = defaultdict(int)

campaign_types = [
    'Cross-network',
    'Search Network',
    'Display Network',
    'Search & content',
    'Shopping',
    'Meta'
]
decay_factor = 0.9

for user_id, user_df in df.groupby('User Id'):
    user_journey = user_df.sort_values('Website Landing Time')
    conversion_index = user_journey.index[user_journey['Is Converted'] == 1].tolist()
    
    if conversion_index:
        conversion_point = conversion_index[0]
        relevant_journey = user_journey.loc[:conversion_point]
        journey_length = len(relevant_journey)
        
        for i, (_, row) in enumerate(relevant_journey.iterrows()):
            weight = decay_factor ** (journey_length - i - 1)
            credit_scores[row['Campaign Type']] += weight
            touch_points[row['Campaign Type']] += 1
    else:
        for _, row in user_journey.iterrows():
            credit_scores[row['Campaign Type']] += 0.1
            touch_points[row['Campaign Type']] += 1

# final scores (normalized to 0-1 range)
total_credit = sum(credit_scores.values())
final_scores = {}

for campaign_type in campaign_types:
    if touch_points[campaign_type] > 0:
        normalized_credit = credit_scores[campaign_type] / total_credit if total_credit > 0 else 0
        final_scores[campaign_type] = min(normalized_credit, 1.0)
    else:
        final_scores[campaign_type] = 0.0

print("Final Credit Scores:")
for campaign_type in campaign_types:
    print(f"{campaign_type}: {final_scores[campaign_type]:.4f}")

print("Raw Credit Scores:", dict(credit_scores))
print("Touch Points:", dict(touch_points))

Final Credit Scores:
Cross-network: 0.5870
Search Network: 0.1998
Display Network: 0.0159
Search & content: 0.0275
Shopping: 0.0616
meta: 0.1083

Debug Information:
Raw Credit Scores: {'Cross-network': 60994.748754183136, 'meta': 11250.962195956181, 'Search Network': 20760.86759770484, 'Shopping': 6398.718518679385, 'Search & content': 2856.4586368380997, 'Display Network': 1655.689141309085}
Touch Points: {'Cross-network': 433945, 'meta': 102456, 'Search Network': 147457, 'Shopping': 44550, 'Search & content': 19731, 'Display Network': 11776}


In [6]:
results_df = pd.DataFrame({
    'Campaign Type': campaign_types,
    'Credit Score': [final_scores[ct] for ct in campaign_types]
})

# Save the results to a new CSV file
output_file = 'campaign_credit_scores.csv'
results_df.to_csv(output_file, index=False)