In [None]:
import numpy as np
import pandas as pd

pd.set_option("display.precision", 2)

In [None]:
df = pd.read_csv("telecom_churn.csv")
df.head()

In [None]:
print(df.shape)

In [None]:
print(df.columns)

In [None]:
print(df.info())

In [None]:
df["churn"] = df["churn"].astype("int64")

In [None]:
df["churn" ]

In [None]:
df.describe()

In [None]:
df.describe(include=["object", "bool"])

In [None]:
df["churn"].value_counts()

In [None]:
df["churn"].value_counts(normalize=True)

In [None]:
df.sort_values(by="total day charge", ascending=False).head()

In [None]:
df.sort_values(by=["churn", "total day charge"], ascending=[True, False]).head()

In [None]:
df["churn"].mean()

In [None]:
# What are average values of numerical features for churned users?
df[df["churn"] == 1].mean()

In [None]:
# How much time (on average) do churned users spend on the phone during daytime?
df[df["churn"] == 1]["total day minutes"].mean()

In [None]:
df[(df["churn"] == 0) & (df["international plan"] == "no")]["total intl minutes"].max()

In [None]:
df.loc[0:5, "state":"area code"]

In [None]:
df.iloc[0:5, 0:3]

In [None]:
df[-1:]

In [None]:
df.apply(np.max)

In [None]:
df[df["state"].apply(lambda state: state[0] == "W")].head()

In [None]:
d = {"no": False, "yes": True}
df["international plan"] = df["international plan"].map(d)
df.head()

In [None]:
df = df.replace({"voice mail plan": d})
df.head()

In [None]:
columns_to_show = ["total day minutes", "total eve minutes", "total night minutes"]

df.groupby(["churn"])[columns_to_show].describe(percentiles=[])

In [None]:
df.groupby(["churn"])[columns_to_show].agg([np.mean, np.std, np.min, np.max])

In [None]:
pd.crosstab(df["churn"], df["international plan"])

In [None]:
pd.crosstab(df["churn"], df["voice mail plan"], normalize=True)

In [None]:
df.pivot_table(
    ["total day calls", "total eve calls", "total night calls"],
    ["area code"],
    aggfunc="mean",
)

In [None]:
total_calls = (
    df["total day calls"]
    + df["total eve calls"]
    + df["total night calls"]
    + df["total intl calls"]
)
df.insert(loc=len(df.columns), column="total calls", value=total_calls)
# loc parameter is the number of columns after which to insert the Series object
# we set it to len(df.columns) to paste it at the very end of the dataframe
df.head()

In [None]:
df["total charge"] = (
    df["total day charge"]
    + df["total eve charge"]
    + df["total night charge"]
    + df["total intl charge"]
)
df.head()

In [None]:
# get rid of just created columns
df.drop(["total charge", "total calls"], axis=1, inplace=True)
# and here’s how you can delete rows
df.drop([1, 2]).head()

In [None]:
df.head()

In [None]:
pd.crosstab(df["churn"], df["international plan"], margins=True)

In [None]:
# some imports to set up plotting
import matplotlib.pyplot as plt
# pip install seaborn
import seaborn as sns

# Graphics in retina format are more sharp and legible
%config InlineBackend.figure_format = 'retina'

In [None]:
sns.countplot(x="international plan", hue="churn", data=df);

In [None]:
pd.crosstab(df["churn"], df["customer service calls"], margins=True)

In [None]:
sns.countplot(x="customer service calls", hue="churn", data=df);

In [None]:
df["many service calls"] = (df["customer service calls"] > 3).astype("int")

pd.crosstab(df["many service calls"], df["churn"], margins=True)

In [None]:
sns.countplot(x="many service calls", hue="churn", data=df)

In [None]:
pd.crosstab(df["many service calls"] & df["international plan"], df["churn"])