In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from datetime import datetime

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

![](https://www.growthsaloon.com/wp-content/uploads/2022/01/mavenani-01-e1641942385376-1024x361.png)

### **Recommended Analysis**


**1.Are there any null values or outliers? How will you handle them?**

**2.What factors are significantly related to the number of web purchases?**

**3.Which marketing campaign was the most successful?**

**4.What does the average customer look like?**

**5.Which products are performing best?**

**6.Which channels are underperforming?**


In [None]:
#import the data using by pandas
df=pd.read_csv("/kaggle/input/marketing-dataset/marketing_data.csv")

In [None]:
df

In [None]:
#check total columns and rows
df.shape

In [None]:
#check data info and type
df.info()

In [None]:
#checking null values
df.isna().sum()

In [None]:
# Rename the column to trim whitespace
df.rename(columns=lambda x: x.strip(), inplace=True)

In [None]:
df.columns

In [None]:
# Check statistics of the DataFrame
df.describe()

In [None]:
# Calculate the mean of the 'Income' column, rounding if needed
mean_income = df['Income'].mean()

In [None]:
# Replace null values with the mean
df['Income']=df['Income'].fillna(mean_income)

In [None]:
df.Income.isna().sum()

#### 1.Are there any null values or outliers? How will you handle them?


**"Yes, this dataset contains 24 null/blanks found in the 'Income' column. We can handle these null values by using the mean."**

##### 2. What factors are significantly related to the number of web purchases?


In [None]:
# Get the current year
current_year = datetime.now().year

# Calculate age
df['Age'] = current_year - df['Year_Birth']

In [None]:
age_bins = pd.cut(df['Age'], bins=6)
by_age = df.groupby(age_bins)['NumWebPurchases'].sum()
by_age


In [None]:
bins = pd.cut(df['Income'], bins=6)
Income= df.groupby(bins)['NumWebPurchases'].sum()
Income

In [None]:
Education = df.groupby(['Education'])['NumWebPurchases'].sum()
Education

In [None]:
Country = df.groupby(['Country'])['NumWebPurchases'].sum()
Country

#### 3.Which marketing campaign was the most successful?


In [None]:
sum_accepted_cmp1 = df['AcceptedCmp1'].sum()
sum_accepted_cmp2 = df['AcceptedCmp2'].sum()
sum_accepted_cmp3 = df['AcceptedCmp3'].sum()
sum_accepted_cmp4 = df['AcceptedCmp4'].sum()
sum_accepted_cmp5 = df['AcceptedCmp5'].sum()
sum_response = df['Response'].sum()

# Print results one by one
print(f"AcceptedCmp1: {sum_accepted_cmp1}")
print(f"AcceptedCmp2: {sum_accepted_cmp2}")
print(f"AcceptedCmp3: {sum_accepted_cmp3}")
print(f"AcceptedCmp4: {sum_accepted_cmp4}")
print(f"AcceptedCmp5: {sum_accepted_cmp5}")
print(f"Response: {sum_response}")

**"The marketing campaign was the most successful response campaign with 334."**

#### 4.What does the average customer look like? 

In [None]:
df.Age.mean()

In [None]:
plt.hist(df['Age'], bins=10, edgecolor='black')

# Add titles and labels
plt.title('Age Distribution')
plt.xlabel('Age')
plt.ylabel('Frequency')

# Show the plot
plt.show()


In [None]:
df.Education.value_counts()

In [None]:
education_counts = df['Education'].value_counts()

plt.figure(figsize=(8, 6))
education_counts.plot(kind='bar', color='orange')

plt.title('Education Level Distribution')
plt.xlabel('Education Level')
plt.ylabel('Count')

plt.show()

#### 5.Which products are performing best?


In [None]:
sum_products = df[['MntWines', 'MntFruits', 'MntMeatProducts', 'MntFishProducts', 'MntSweetProducts', 'MntGoldProds']].sum().sort_values(ascending=False)
sum_products  

In [None]:
plt.figure(figsize=(10, 6)) 
sum_products.plot(kind='bar', color='skyblue')


plt.title('Total Sales of Product Categories')
plt.xlabel('Product Categories')
plt.ylabel('Total Sales')


plt.show()

#### 6.Which channels are underperforming?

In [None]:
sum_channels = df[['NumDealsPurchases', 'NumWebPurchases','NumCatalogPurchases', 'NumStorePurchases', 'NumWebVisitsMonth']].sum().sort_values(ascending=False)
sum_channels

In [None]:
plt.figure(figsize=(10, 6)) 
sum_channels.plot(kind='bar', color='blue')


plt.title('Total Sales of Product Categories')
plt.xlabel('Product Categories')
plt.ylabel('Total Sales')


plt.show()

### In conclusion, Maven Marketing should prioritize data-driven decision-making to optimize marketing strategies. Addressing null values, outliers, and understanding factors influencing web purchases are critical steps. Identifying successful campaigns, customer profiles, top-performing products, and optimizing underperforming channels will enhance overall marketing effectiveness and customer engagement. Regular monitoring and iterative improvements based on insights will be key to sustained success in the competitive marketing landscape.