# Import All Required Python Packages

In [2]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Read the Content of the CSV File

In [3]:
# Edit the CSV file a bit, replacing all double quotes (") with empty string
with open("DataSampleA.csv") as input_csv_file:
	content = input_csv_file.read()
	new_content = content.replace('"','')
	with open("new_data_sample.csv", "w") as new_csv_file:
		new_csv_file.write(new_content)

# Read the Content of the CSV File
df = pd.read_csv('new_data_sample.csv', sep=";")


# Contact Type Analysis 

In [4]:
def draw_duration_hist(
	seconds_per_bin: int, 
):
	"""
	Draw the duration histogram for the marketing campaign calls

	Parameters
	----------
	seconds_per_bin : int
		Specify the size of each bin (measured in seconds)
	"""

	fig = make_subplots(
		rows=2, cols=2, subplot_titles=(
			"Frequency of Calls (Cellular) vs Duration (mins)",
			"Frequency of Calls (Telephone) vs Duration (mins)",
			"Percentage of Calls (Cellular) vs Duration (mins)",
			"Percentage of Calls (Telephone) vs Duration (mins)",
		),
		shared_yaxes=True,
	)
	telephone_col = df.loc[df['contact'] == 'telephone', 'duration']
	cellular_col = df.loc[df['contact'] == 'cellular', 'duration']

	fig.add_trace(go.Histogram(
		x=cellular_col,
		name='no of calls',
		histnorm='',
		xbins=dict(
			start=0,
			end=cellular_col.max(),
			size=seconds_per_bin
		),
		hovertemplate='%{y}',
		marker_color='blue',
	), row=1, col=1)

	fig.add_trace(go.Histogram(
		x=telephone_col,
		name='no of calls',
		histnorm='',
		xbins=dict(
			start=0,
			end=telephone_col.max(),
			size=seconds_per_bin
		),
		hovertemplate='%{y}',
		marker_color='orange'
	), row=1, col=2)

	fig.add_trace(go.Histogram(
		x=cellular_col,
		name='Percent of calls',
		histnorm='percent',
		xbins=dict(
			start=0,
			end=cellular_col.max(),
			size=seconds_per_bin
		),
		hovertemplate='%{y:.1f}%',
		marker_color='blue'
	), row=2, col=1)

	fig.add_trace(go.Histogram(
		x=telephone_col,
		name='Percent of calls',
		histnorm='percent',
		xbins=dict(
			start=0,
			end=telephone_col.max(),
			size=seconds_per_bin
		),
		hovertemplate='%{y:.1f}%',
		marker_color='orange'
	), row=2, col=2)
	
	fig.update_xaxes(
		title_text="Call duration (mins) for Contact Type 'Cellular'", 
		range=[0, df['duration'].max()],
		row=1, 
		col=1
	)
	fig.update_yaxes(
		title_text="No of Calls for Each Duration Group",
		row=1,
		col=1
	)

	# xaxis and yaxis setting for row=1, col=2
	fig.update_xaxes(
		title_text="Call duration (mins) for Contact Type 'Telephone'", 
		range=[0, df['duration'].max()],
		row=1, 
		col=2
	)


	# xaxis and yaxis setting for row=2, col=1
	fig.update_xaxes(
		title_text="Call duration (mins) for Contact Type 'Cellular'", 
		range=[0, df['duration'].max()],
		row=2, 
		col=1
	)
	fig.update_yaxes(
		title_text="Percent of Calls for Each Duration Group",
		row=2,
		col=1
	)

	# xaxis and yaxis setting for row=2, col=2
	fig.update_xaxes(
		title_text="Call duration (mins) for Contact Type 'Telephone'", 
		range=[0, df['duration'].max()],
		row=2, 
		col=2
	)

	fig.update_layout(
		title_text="Call Frequency vs Call Duration for Different Contact Methods", 
		showlegend=False,
		width=1000,
		height=900,
		xaxis1=dict(
				tickmode='array',
				tickvals=list(range(0,df['duration'].max(),seconds_per_bin)),
				ticktext=[
					f"{value / 60:.1f}min" 
					for value in 
					range(0, df['duration'].max(), seconds_per_bin)
				]
		),
		xaxis2=dict(
				tickmode='array',
				tickvals=list(range(0,df['duration'].max(),seconds_per_bin)),
				ticktext=[
					f"{value / 60:.1f}min" 
					for value in 
					range(0, df['duration'].max(), seconds_per_bin)
				]
		),
		xaxis3=dict(
				tickmode='array',
				tickvals=list(range(0,df['duration'].max(),seconds_per_bin)),
				ticktext=[
					f"{value / 60:.1f}min" 
					for value in 
					range(0, df['duration'].max(), seconds_per_bin)
				]
		),
		xaxis4=dict(
				tickmode='array',
				tickvals=list(range(0,df['duration'].max(),seconds_per_bin)),
				ticktext=[
					f"{value / 60:.1f}min" 
					for value in 
					range(0, df['duration'].max(), seconds_per_bin)
				]
		)
	)
	fig.show()

In [7]:
temp_df = df.copy()
temp_df['duration_minutes'] = temp_df['duration'] / 60
fig = px.pie(temp_df, values='duration_minutes', names='contact')
fig.update_traces(textinfo='percent')
fig.show()

In [332]:
temp_df = df.copy()
temp_df['duration_minutes'] = temp_df['duration'] / 60
fig = px.box(temp_df, x="duration_minutes", color="contact")
fig.update_traces(quartilemethod="exclusive") # or "inclusive", or "linear" by default
fig.update_layout(
	xaxis_title="Previous Call Duration (minutes)",
	yaxis_title="Contact Type"
)
fig.show()

In [286]:
draw_duration_hist(120)

# Relation Between Previous Call Duration and Total Number of Contacts

In [432]:
temp_df = df.copy()
temp_df['duration_minutes'] = temp_df['duration'] / 60
temp_df['total_no_contacts'] = temp_df['campaign'] + temp_df['previous']
fig = px.scatter(temp_df, x='total_no_contacts', y='duration_minutes')

fig.update_layout(
	xaxis_title="No. of Contacts in Current Campaign + Previous Campaigns",
	yaxis_title="Previous Call Duration (minutes)"
)
fig.show()

# Relation Between Average Annual Balance and Customer Interest in Marketing Campaign

In [308]:
temp_df = df.copy()
temp_df['duration_minutes'] = temp_df['duration'] / 60
temp_df['total_no_contacts'] = temp_df['campaign'] + temp_df['previous']
fig = px.scatter_3d(temp_df, x='total_no_contacts', y='duration_minutes', z='balance',
              color='balance')
fig.update_layout(
    height=600
)
fig.show()

# Education Level vs Average Annual Balance

In [336]:
fig = px.box(df, x="balance", y="education", color="education") 
fig.update_traces(quartilemethod="exclusive") # or "inclusive", or "linear" by default
fig.show()

# No. of Calls in Previous vs Current Campaign

In [337]:
fig = px.scatter(df, x="campaign", y="previous")
fig.update_layout(
	xaxis_title="# Calls in Current Campaign",
	yaxis_title="# Calls in Previous Campaignjj"
)
fig.show()

<h1>
	<center>
		<b>Last Contact Duration, Total Frequency of Previous Contacts</b>
	</center>
</h1>
<h1>
	<center>
		<i>Vs</i>
	</center>
</h1>
<h1>
	<center>
		<b>Previous Outcome</b>
	</center>
</h1>

In [339]:
temp_df = df.copy()
temp_df['total_no_contacts'] = temp_df['previous'] + temp_df['campaign']
temp_df['duration_minutes'] = temp_df['duration'] / 60

fig = px.box(temp_df, x='previous', y='poutcome', color='poutcome')
fig.update_layout(
	xaxis_title="No. of Contacts in Previous Campaign",
	yaxis_title="Previous Campaign Outcome"
)
fig.show()

In [325]:
temp_df = df.copy()
temp_df['total_no_contacts'] = temp_df['previous'] + temp_df['campaign']
temp_df['duration_minutes'] = temp_df['duration'] / 60
fig = px.box(temp_df, x='duration_minutes', y='poutcome', color='poutcome')

fig.update_layout(
	xaxis_title='Last Call Duration (minutes)',
	yaxis_title="Previous Campaign Outcome"
)
fig.show()

# Analysing Last Date of Contact


In [431]:
custom_month_ordering = {
	'jan': 1,
	'feb': 2,
	'mar': 3,
	'apr': 4,
	'may': 5,
	'jun': 6,
	'jul': 7,
	'aug': 8,
	'sep': 9,
	'oct': 10,
	'nov': 11,
	'dec': 12
}

temp_df = df.copy()
temp_df.sort_values(by=['month'], key=lambda x: x.map(custom_month_ordering), inplace=True)
table = pd.pivot_table(temp_df, values='age', index=['month'], columns=['day'], aggfunc='count')
table.sort_index(key=lambda x: x.map(custom_month_ordering), inplace=True)

fig=px.imshow(
	table,
	labels=dict(
		x="Last Contact Day",
		y="Last Contact Month",
		color="No of Customers Called"
	),
	color_continuous_scale="algae"
)
fig.show()