In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import ipywidgets as widgets
from IPython.display import display

# Load the data
df = pd.read_csv('data/data (1).csv')

# Load CSV files into DataFrames
countries_df = pd.read_csv('data/countries.csv')
devices_df = pd.read_csv('data/devices.csv')
genders_df = pd.read_csv('data/genders.csv')
genres_df = pd.read_csv('data/genres.csv')
subscription_types_df = pd.read_csv('data/subscription_types.csv')

# Create dictionaries to map IDs to names
id_to_country = pd.Series(countries_df.name.values, index=countries_df.id).to_dict()
id_to_device = pd.Series(devices_df.name.values, index=devices_df.id).to_dict()
id_to_gender = pd.Series(genders_df.name.values, index=genders_df.id).to_dict()
id_to_genre = pd.Series(genres_df.name.values, index=genres_df.id).to_dict()
id_to_subscription_type = pd.Series(subscription_types_df.name.values, index=subscription_types_df.id).to_dict()

# Replace IDs with names in the main DataFrame
df['country'] = df['country'].map(id_to_country)
df['device'] = df['device'].map(id_to_device)
df['gender'] = df['gender'].map(id_to_gender)
df['preferred_genre'] = df['preferred_genre'].map(id_to_genre)
df['subscription_type'] = df['subscription_type'].map(id_to_subscription_type)


# Convert date columns to datetime
df['birth_date'] = pd.to_datetime(df['birth_date'])
df['join_date'] = pd.to_datetime(df['join_date'])
df['last_payment_date'] = pd.to_datetime(df['last_payment_date'])

# Calculate age
df['age'] = (pd.to_datetime('2023-06-30') - df['birth_date']).dt.days // 365

# Create a column for last payment year and month
df['last_payment_year_month'] = df['last_payment_date'].dt.to_period('M')

# Function to plot
def plot(option):
    plt.figure(figsize=(10, 6))
    if option == 'Distribution of Users by Country':
        sns.countplot(data=df, x='country')
    elif option == 'Distribution of Users by Gender':
        sns.countplot(data=df, x='gender')
    elif option == 'Distribution of Users by Device':
        sns.countplot(data=df, x='device')
    elif option == 'Distribution of Users by Subscription Type':
        sns.countplot(data=df, x='subscription_type')
    elif option == 'Distribution of Average Watch Time':
        sns.histplot(data=df, x='average_watch_time', bins=30)
    elif option == 'Distribution of Age':
        sns.histplot(data=df, x='age', bins=30)
    elif option == 'Number of Cancellations Over Time':
        cancellations = df[df['last_payment_date'] < pd.to_datetime('2023-06-01')].groupby('last_payment_year_month').size()
        cancellations.plot(kind='line')
        plt.xlabel('Last Payment Year and Month')
        plt.ylabel('Number of Cancellations')
    plt.title(option)
    plt.show()

# Create a dropdown menu
dropdown = widgets.Dropdown(
    options=['Distribution of Users by Country', 'Distribution of Users by Gender', 'Distribution of Users by Device', 'Distribution of Users by Subscription Type', 'Distribution of Average Watch Time', 'Distribution of Age', 'Number of Cancellations Over Time'],
    description='Select plot:',
)

# Use the plot function when the dropdown value changes
widgets.interact(plot, option=dropdown)


interactive(children=(Dropdown(description='Select plot:', options=('Distribution of Users by Country', 'Distr…

<function __main__.plot(option)>