# User Behavior Overview

## Imports

In [None]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [None]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
pd.set_option("expand_frame_repr", False)
# pd.set_option('display.float_format', '{:.2f}'.format)

In [None]:
sys.path.append(os.path.abspath(os.path.join('../scripts')))
from vis_seaborn import *

## Data reading

In [None]:
df = pd.read_csv("../data/clean_data.csv")
df.info()

##  Top 10 handsets used by the customers

In [None]:
df['handset_type'].value_counts().head(10)

## Top 3 handset manufacturers

In [None]:
top_manufacturers = df['handset_manufacturer'].value_counts().head(3)
top_manufacturers

##  Top 5 handsets per top 3 handset manufacturer

In [None]:
x = df[df["handset_manufacturer"].isin(top_manufacturers.keys())]
x['handset_type'].groupby(df['handset_manufacturer']).apply(lambda x: x.value_counts().head(5))

# Task 1.1

## Number of xDR sessions

In [None]:
df.groupby('msisdn_number')['bearer_id'].count().nlargest(20)

## Session duration

In [None]:
df.groupby('msisdn_number')['dur_(ms)'].sum().nlargest(20)


## The total download (DL) and upload (UL) data

In [None]:
usage = df.groupby('msisdn_number')[['total_dl_(bytes)', 'total_ul_(bytes)']].sum()
usage['total_data'] = usage['total_dl_(bytes)'] + usage['total_ul_(bytes)']
usage.nlargest(20, 'total_data')

## The total data volume (in Bytes) during this session for each application

In [None]:
df["social_media"] = df["social_media_dl_(bytes)"] + df['social_media_ul_(bytes)']
df["google"] = df["google_dl_(bytes)"] + df["google_ul_(bytes)"]
df['email'] = df["email_dl_(bytes)"] + df["email_ul_(bytes)"]
df['youtube'] = df["youtube_dl_(bytes)"] + df["youtube_ul_(bytes)"]
df['netflix'] = df["netflix_dl_(bytes)"] + df["netflix_ul_(bytes)"]
df["gaming"] = df["gaming_dl_(bytes)"] + df["gaming_ul_(bytes)"]
df['other'] = df["other_dl_(bytes)"]+df["other_ul_(bytes)"]
df['total_data'] = df['total_dl_(bytes)'] + df['total_ul_(bytes)']

In [None]:
df.groupby('msisdn_number')[['social_media',
                             'google', 'email', 'youtube', 'gaming', 'other', 'total_data']].sum().nlargest(20, 'total_data')

In [None]:
df.to_csv('../data/clean_data.csv', index=False)