## EDA for `campaigns.csv`

Performing exploratory data analysis to gain insights about the campaigns for social media ads

In [1]:
# import necessary libraries 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

In [4]:
# load users.csv 
path = os.getcwd()
campaigns = pd.read_csv(path + "/data/campaigns.csv")

# display first few rows to confirm data loaded properly
campaigns.head()

Unnamed: 0,campaign_id,name,start_date,end_date,duration_days,total_budget
0,1,Campaign_1_Launch,2025-05-25,2025-07-23,59,24021.32
1,2,Campaign_2_Launch,2025-04-16,2025-07-07,82,79342.41
2,3,Campaign_3_Winter,2025-05-04,2025-06-29,56,14343.25
3,4,Campaign_4_Summer,2025-06-04,2025-08-08,65,45326.6
4,5,Campaign_5_Launch,2025-07-11,2025-08-28,48,68376.69


In [7]:
# sum missing entries
print(f"Number of missing values in campaigns\n{campaigns.isna().sum()}\n")

# get information about columns of campaigns
campaigns.info()

Number of missing values in campaigns
campaign_id      0
name             0
start_date       0
end_date         0
duration_days    0
total_budget     0
dtype: int64

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   campaign_id    50 non-null     int64  
 1   name           50 non-null     object 
 2   start_date     50 non-null     object 
 3   end_date       50 non-null     object 
 4   duration_days  50 non-null     int64  
 5   total_budget   50 non-null     float64
dtypes: float64(1), int64(2), object(3)
memory usage: 2.5+ KB


With no missing entries, we can proceed with processing the data 

In [11]:
# change start_date and end_date to pd datetime objects 
campaigns['start_date'] = pd.to_datetime(campaigns['start_date'])
campaigns['end_date'] = pd.to_datetime(campaigns['end_date'])

# replace underscores in name column with spaces
campaigns['name'] = campaigns['name'].str.replace('_', ' ')

# extract month from start_date and end_date
campaigns['start_month'] = campaigns['start_date'].dt.month
campaigns['end_month'] = campaigns['end_date'].dt.month

# extract year from start_date and end_date
campaigns['start_year'] = campaigns['start_date'].dt.year
campaigns['end_year'] = campaigns['end_date'].dt.year

# replace index with campaign_id
campaigns.set_index("campaign_id", inplace = True)

# show new results
campaigns.head()

Unnamed: 0_level_0,name,start_date,end_date,duration_days,total_budget,start_month,end_month,start_year,end_year
campaign_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,Campaign 1 Launch,2025-05-25,2025-07-23,59,24021.32,5,7,2025,2025
2,Campaign 2 Launch,2025-04-16,2025-07-07,82,79342.41,4,7,2025,2025
3,Campaign 3 Winter,2025-05-04,2025-06-29,56,14343.25,5,6,2025,2025
4,Campaign 4 Summer,2025-06-04,2025-08-08,65,45326.6,6,8,2025,2025
5,Campaign 5 Launch,2025-07-11,2025-08-28,48,68376.69,7,8,2025,2025
