# 🏠 Load libraries & data 🏠

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from warnings import filterwarnings

filterwarnings("ignore")
%matplotlib inline

In [2]:
df= pd.read_csv("marketing_campaign_dataset.csv")

# 📃 Basic Analysis 📃

In [3]:
df.shape

(200000, 16)

In [4]:
df.columns

Index(['Campaign_ID', 'Company', 'Campaign_Type', 'Target_Audience',
       'Duration', 'Channel_Used', 'Conversion_Rate', 'Acquisition_Cost',
       'ROI', 'Location', 'Language', 'Clicks', 'Impressions',
       'Engagement_Score', 'Customer_Segment', 'Date'],
      dtype='object')

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200000 entries, 0 to 199999
Data columns (total 16 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   Campaign_ID       200000 non-null  int64  
 1   Company           200000 non-null  object 
 2   Campaign_Type     200000 non-null  object 
 3   Target_Audience   200000 non-null  object 
 4   Duration          200000 non-null  object 
 5   Channel_Used      200000 non-null  object 
 6   Conversion_Rate   200000 non-null  float64
 7   Acquisition_Cost  200000 non-null  object 
 8   ROI               200000 non-null  float64
 9   Location          200000 non-null  object 
 10  Language          200000 non-null  object 
 11  Clicks            200000 non-null  int64  
 12  Impressions       200000 non-null  int64  
 13  Engagement_Score  200000 non-null  int64  
 14  Customer_Segment  200000 non-null  object 
 15  Date              200000 non-null  object 
dtypes: float64(2), int64

In [6]:
df.head()

Unnamed: 0,Campaign_ID,Company,Campaign_Type,Target_Audience,Duration,Channel_Used,Conversion_Rate,Acquisition_Cost,ROI,Location,Language,Clicks,Impressions,Engagement_Score,Customer_Segment,Date
0,1,Innovate Industries,Email,Men 18-24,30 days,Google Ads,0.04,"$16,174.00",6.29,Chicago,Spanish,506,1922,6,Health & Wellness,2021-01-01
1,2,NexGen Systems,Email,Women 35-44,60 days,Google Ads,0.12,"$11,566.00",5.61,New York,German,116,7523,7,Fashionistas,2021-01-02
2,3,Alpha Innovations,Influencer,Men 25-34,30 days,YouTube,0.07,"$10,200.00",7.18,Los Angeles,French,584,7698,1,Outdoor Adventurers,2021-01-03
3,4,DataTech Solutions,Display,All Ages,60 days,YouTube,0.11,"$12,724.00",5.55,Miami,Mandarin,217,1820,7,Health & Wellness,2021-01-04
4,5,NexGen Systems,Email,Men 25-34,15 days,YouTube,0.05,"$16,452.00",6.5,Los Angeles,Mandarin,379,4201,3,Health & Wellness,2021-01-05


In [7]:
df.tail()

Unnamed: 0,Campaign_ID,Company,Campaign_Type,Target_Audience,Duration,Channel_Used,Conversion_Rate,Acquisition_Cost,ROI,Location,Language,Clicks,Impressions,Engagement_Score,Customer_Segment,Date
199995,199996,TechCorp,Display,All Ages,30 days,Google Ads,0.06,"$18,365.00",2.84,Chicago,German,858,5988,1,Tech Enthusiasts,2021-12-07
199996,199997,DataTech Solutions,Email,Men 25-34,15 days,Facebook,0.02,"$8,168.00",4.14,Chicago,Spanish,228,3068,7,Foodies,2021-12-08
199997,199998,DataTech Solutions,Social Media,Men 18-24,45 days,Website,0.05,"$13,397.00",3.25,New York,Mandarin,723,9548,3,Tech Enthusiasts,2021-12-09
199998,199999,Innovate Industries,Influencer,All Ages,30 days,YouTube,0.1,"$18,508.00",3.86,Houston,French,528,2763,1,Foodies,2021-12-10
199999,200000,Innovate Industries,Social Media,Women 35-44,45 days,Google Ads,0.01,"$13,835.00",6.64,Chicago,Spanish,924,7287,8,Tech Enthusiasts,2021-12-11


# 🧩 Data Preparation 🧩

In [8]:
# Remove dollar sign and commas from 'Acquisition_Cost' column and convert it to float
df['Acquisition_Cost'] = df['Acquisition_Cost'].str.replace('$', '').str.replace(',', '').astype(float)

In [9]:

# Convert 'Duration' column to numeric by extracting the number of days
df['Duration'] = df['Duration'].str.extract('(\d+)').astype(int)

In [10]:
# Convert 'Date' column to datetime format
df['Date'] = pd.to_datetime(df['Date'])

In [11]:
# Check for any missing values
df.isnull().sum()

Campaign_ID         0
Company             0
Campaign_Type       0
Target_Audience     0
Duration            0
Channel_Used        0
Conversion_Rate     0
Acquisition_Cost    0
ROI                 0
Location            0
Language            0
Clicks              0
Impressions         0
Engagement_Score    0
Customer_Segment    0
Date                0
dtype: int64

In [12]:
# Summary statistics
df.describe()

Unnamed: 0,Campaign_ID,Duration,Conversion_Rate,Acquisition_Cost,ROI,Clicks,Impressions,Engagement_Score,Date
count,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000
mean,100000.5,37.503975,0.08007,12504.39304,5.002438,549.77203,5507.30152,5.49471,2021-07-01 23:35:09.600000
min,1.0,15.0,0.01,5000.0,2.0,100.0,1000.0,1.0,2021-01-01 00:00:00
25%,50000.75,30.0,0.05,8739.75,3.5,325.0,3266.0,3.0,2021-04-02 00:00:00
50%,100000.5,30.0,0.08,12496.5,5.01,550.0,5517.5,5.0,2021-07-02 00:00:00
75%,150000.25,45.0,0.12,16264.0,6.51,775.0,7753.0,8.0,2021-10-01 00:00:00
max,200000.0,60.0,0.15,20000.0,8.0,1000.0,10000.0,10.0,2021-12-31 00:00:00
std,57735.171256,16.74672,0.040602,4337.664545,1.734488,260.019056,2596.864286,2.872581,


In [13]:
# Unique values of 'Campaign_Type'
Company_types = df['Company'].unique()
print("Unique Campaign Types:", Company_types)

Unique Campaign Types: ['Innovate Industries' 'NexGen Systems' 'Alpha Innovations'
 'DataTech Solutions' 'TechCorp']


In [14]:
# Unique values of 'Campaign_Type'
campaign_types = df['Campaign_Type'].unique()
print("Unique Campaign Types:", campaign_types)

Unique Campaign Types: ['Email' 'Influencer' 'Display' 'Search' 'Social Media']


In [15]:
# Unique values of 'Target_Audience'
target_audiences = df['Target_Audience'].unique()
print("Unique Target Audiences:", target_audiences)

Unique Target Audiences: ['Men 18-24' 'Women 35-44' 'Men 25-34' 'All Ages' 'Women 25-34']


In [16]:

# Unique values of 'Channel_Used'
channels_used = df['Channel_Used'].unique()
print("Unique Channels Used:", channels_used)

Unique Channels Used: ['Google Ads' 'YouTube' 'Instagram' 'Website' 'Facebook' 'Email']


In [17]:
# Unique values of 'Language'
languages = df['Language'].unique()
print("Unique Languages:", languages)

Unique Languages: ['Spanish' 'German' 'French' 'Mandarin' 'English']


In [18]:
# Unique values of 'Customer_Segment'
customer_segments = df['Customer_Segment'].unique()
print("Unique Customer Segments:", customer_segments)

Unique Customer Segments: ['Health & Wellness' 'Fashionistas' 'Outdoor Adventurers' 'Foodies'
 'Tech Enthusiasts']


In [19]:
# Convert 'Campaign_Type' column to categorical
df['Company'] = df['Company'].astype('category')

# Convert 'Campaign_Type' column to categorical
df['Campaign_Type'] = df['Campaign_Type'].astype('category')


# Convert 'Target_Audience' column to categorical
df['Target_Audience'] = df['Target_Audience'].astype('category')

# Convert 'Channel_Used' column to categorical
df['Channel_Used'] = df['Channel_Used'].astype('category')

# Convert 'Language' column to categorical
df['Language'] = df['Language'].astype('category')

# Convert 'Customer_Segment' column to categorical
df['Customer_Segment'] = df['Customer_Segment'].astype('category')

In [20]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200000 entries, 0 to 199999
Data columns (total 16 columns):
 #   Column            Non-Null Count   Dtype         
---  ------            --------------   -----         
 0   Campaign_ID       200000 non-null  int64         
 1   Company           200000 non-null  category      
 2   Campaign_Type     200000 non-null  category      
 3   Target_Audience   200000 non-null  category      
 4   Duration          200000 non-null  int32         
 5   Channel_Used      200000 non-null  category      
 6   Conversion_Rate   200000 non-null  float64       
 7   Acquisition_Cost  200000 non-null  float64       
 8   ROI               200000 non-null  float64       
 9   Location          200000 non-null  object        
 10  Language          200000 non-null  category      
 11  Clicks            200000 non-null  int64         
 12  Impressions       200000 non-null  int64         
 13  Engagement_Score  200000 non-null  int64         
 14  Cust

As you can see, the columns **Campaign_Type, Target_Audience, Channel_Used, Language, and Customer_Segment** are now shown as the category data type in the DataFrame's information summary. 

This conversion helps optimize memory usage and enables more efficient operations on categorical data.