# **1. Importing libraries**

In [1]:
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.cluster import KMeans

pio.templates.default = "plotly_white"

# **2. Loading data and descriptive statistics**

In [3]:
# Loading data
data = pd.read_csv("userbehaviour.csv")

# Displaying the first few rows
print("First few rows of the data:")
print(data.head())

# Descriptive statistics for key variables
print(f"\nDescriptive statistics for 'Average Screen Time':")
print(f" - Mean: {data['Average Screen Time'].mean()}")
print(f" - Maximum: {data['Average Screen Time'].max()}")
print(f" - Minimum: {data['Average Screen Time'].min()}")

print(f"\nDescriptive statistics for 'Average Spent on App (INR)':")
print(f" - Mean: {data['Average Spent on App (INR)'].mean()}")
print(f" - Maximum: {data['Average Spent on App (INR)'].max()}")
print(f" - Minimum: {data['Average Spent on App (INR)'].min()}")

First few rows of the data:
   userid  Average Screen Time  Average Spent on App (INR)  Left Review  \
0    1001                 17.0                       634.0            1   
1    1002                  0.0                        54.0            0   
2    1003                 37.0                       207.0            0   
3    1004                 32.0                       445.0            1   
4    1005                 45.0                       427.0            1   

   Ratings  New Password Request  Last Visited Minutes       Status  
0        9                     7                  2990    Installed  
1        4                     8                 24008  Uninstalled  
2        8                     5                   971    Installed  
3        6                     2                   799    Installed  
4        5                     6                  3668    Installed  

Descriptive statistics for 'Average Screen Time':
 - Mean: 24.39039039039039
 - Maximum: 50.0
 - Min

# **3. Creating scatter plots**

In [4]:
# Creating a scatter plot showing the relationship between screen time and spending capacity
figure = px.scatter(data_frame = data, 
                    x="Average Screen Time",
                    y="Average Spent on App (INR)", 
                    size="Average Spent on App (INR)", 
                    color= "Status",
                    title = "Relationship Between Spending Capacity and Screentime",
                    trendline="ols")
figure.show()

# Creating a scatter plot showing the relationship between screen time and user ratings
figure = px.scatter(data_frame = data, 
                    x="Average Screen Time",
                    y="Ratings", 
                    size="Ratings", 
                    color= "Status", 
                    title = "Relationship Between Ratings and Screentime",
                    trendline="ols")
figure.show()

# **4. Data segmentation**

In [6]:
# Selecting variables for segmentation
clustering_data = data[["Average Screen Time", "Left Review", 
                        "Ratings", "Last Visited Minutes", 
                        "Average Spent on App (INR)", 
                        "New Password Request"]]

# Normalizing data
scaler = MinMaxScaler()
clustering_data_scaled = scaler.fit_transform(clustering_data)

# Running K-means clustering with three clusters, setting n_init=10 explicitly
kmeans = KMeans(n_clusters=3, n_init=10)
clusters = kmeans.fit_predict(clustering_data_scaled)
data["Segments"] = clusters

# Displaying the first 10 rows with segment assignments
print(data.head(10))

# Counting the number of users in each segment
print(data["Segments"].value_counts())

# Mapping segment numbers to descriptive names
data["Segments"] = data["Segments"].map({0: "Retained", 1: 
    "Churn", 2: "Needs Attention"})

   userid  Average Screen Time  Average Spent on App (INR)  Left Review  \
0    1001                 17.0                       634.0            1   
1    1002                  0.0                        54.0            0   
2    1003                 37.0                       207.0            0   
3    1004                 32.0                       445.0            1   
4    1005                 45.0                       427.0            1   
5    1006                 28.0                       599.0            0   
6    1007                 49.0                       887.0            1   
7    1008                  8.0                        31.0            0   
8    1009                 28.0                       741.0            1   
9    1010                 28.0                       524.0            1   

   Ratings  New Password Request  Last Visited Minutes       Status  Segments  
0        9                     7                  2990    Installed         1  
1        4    

# **5. Creating scatter plot with segments**

In [8]:
# Creating a scatter plot showing the relationship between last visited minutes and average spending, with segments shown in different colors
PLOT = go.Figure()
for i in list(data["Segments"].unique()):
    PLOT.add_trace(go.Scatter(x = data[data["Segments"]== i]['Last Visited Minutes'],
                                y = data[data["Segments"] == i]['Average Spent on App (INR)'],
                                mode = 'markers',marker_size = 6, marker_line_width = 1,
                                name = str(i)))
PLOT.update_traces(hovertemplate='Last Visited Minutes: %{x} <br>Average Spent on App (INR): %{y}')

# Updating the layout of the scatter plot
PLOT.update_layout(width = 800, height = 800, autosize = True, showlegend = True,
                   title = 'Relationship between Last Visited Minutes and Average Spending, with Segments',
                   yaxis_title = 'Average Spent on App (INR)',
                   xaxis_title = 'Last Visited Minutes',
                   scene = dict(xaxis=dict(title = 'Last Visited Minutes', titlefont_color = 'black'),
                                yaxis=dict(title = 'Average Spent on App (INR)', titlefont_color = 'black')))