#### Importing necessary packages

In [1]:
import pandas as pd
from privacy_utils.anonymizer import anonymize

#### Loading the data from the xml file and anonymizing it for privacy concerns

In [2]:
df = anonymize("data/calls-20250902222444.xml")
df.head()

Unnamed: 0,contact,call_type,duration,date,timestamp
0,26140da466,missed,0,"Jan 1, 2018 06:31:46",1514777506484
1,be9b9c501b,missed,0,"Jan 1, 2018 10:30:22",1514791822769
2,21e87df9bc,outgoing,53,"Aug 3, 2020 15:02:32",1596456152643
3,21e87df9bc,outgoing,166,"Aug 3, 2020 15:03:46",1596456226439
4,239595015e,incoming,1067,"Aug 6, 2020 17:44:52",1596725092157


## 1. Descriptive Analysis (Basic Insights)
Under this subset, we will mainly focus on two things;
- Call volume
- Call durations

### Call Volume Patterns

In [3]:
# Preparation of time columns
descriptive_df = df.copy()

descriptive_df["datetime"] = pd.to_datetime(descriptive_df["timestamp"],  unit="ms", utc=True)
descriptive_df["hour"] = descriptive_df["datetime"].dt.hour
descriptive_df["day_of_week"] = descriptive_df["datetime"].dt.day_name()
descriptive_df["month"] = descriptive_df["datetime"].dt.to_period("M")

# Dropping the date and timestamp columns bcz we nolonger need them as of now
descriptive_df.drop(columns=["date", "timestamp"], inplace=True)

descriptive_df.head()

  descriptive_df["month"] = descriptive_df["datetime"].dt.to_period("M")


Unnamed: 0,contact,call_type,duration,datetime,hour,day_of_week,month
0,26140da466,missed,0,2018-01-01 03:31:46.484000+00:00,3,Monday,2018-01
1,be9b9c501b,missed,0,2018-01-01 07:30:22.769000+00:00,7,Monday,2018-01
2,21e87df9bc,outgoing,53,2020-08-03 12:02:32.643000+00:00,12,Monday,2020-08
3,21e87df9bc,outgoing,166,2020-08-03 12:03:46.439000+00:00,12,Monday,2020-08
4,239595015e,incoming,1067,2020-08-06 14:44:52.157000+00:00,14,Thursday,2020-08


##### i. Total Number of Calls

In [4]:
total_calls = len(df)
total_calls

1303

##### ii. Calls per Category

In [5]:
calls_per_category = df[["call_type", "contact"]].groupby('call_type', as_index=False).count()
calls_per_category

Unnamed: 0,call_type,contact
0,incoming,444
1,missed,180
2,outgoing,667
3,voicemail,12


iii. Calls per Day