# Netflix Data Analysis
Analysis of the data downloaded from Netflix account. 


Here I'll be using my own personal netflix data.
[Visit this page](https://www.netflix.com/account/getmyinfo) to get your netflix data if you have an account.

In [None]:
import pandas as pd

#read your csv file 
df = pd.read_csv('ViewingActivity.csv')

In [None]:
#to check size of the dataframe df
df.shape

In [None]:
#to print first 1 rows
df.head(1)

In [None]:
#dropping unwanted Coloumns
df = df.drop(['Attributes' , 'Supplemental Video Type' , 'Device Type' , 'Bookmark' , 'Latest Bookmark' , 'Country'] , axis=1)

df.head(1)

In [None]:
#checking the datatypes of 
df.dtypes

In [None]:
#converting start time to datetime format
df['Start Time'] = pd.to_datetime(df['Start Time'], utc = True)

df.dtypes

In [None]:
# change the Start Time column into the dataframe's index
df = df.set_index('Start Time')


# convert from UTC timezone to Indian time
df.index = df.index.tz_convert('Asia/Kolkata')

# reset the index so that Start Time becomes a column again
df =df.reset_index()

df.head(1)

In [None]:
#to convert duration as timedelta format
df['Duration'] = pd.to_timedelta(df['Duration'])
df.dtypes

# Viewing data of a specific program
Here I am using *"Formula 1 : Drive to survive"* data to analyse

In [None]:
#Seprating my profile's("Kal El") viewing data
my_profilename = df[df['Profile Name'].str.contains('Kal El' , regex = False)]

#Storing Formula1's watch data by checking if the title contains the keyword
formula1 = df[df['Title'].str.contains('Formula 1' , regex = False)]

formula1

In [None]:
#duration should be atlest one minute 
formula1 = formula1[(formula1['Duration'] > '0 days 00:01:00')]

formula1.head(20)

In [None]:
#to store which days of the week have I watched the episodes?
formula1['weekday'] = formula1['Start Time'].dt.weekday

#During which hours of the day do I most often start the episodes?
formula1['hour'] = formula1['Start Time'].dt.hour

# check to make sure the columns were added correctly
formula1.head(1)

In [None]:
%matplotlib inline

import matplotlib

In [None]:
formula1['weekday'] = pd.Categorical(formula1['weekday'], categories = [0,1,2,3,4,5,6] , ordered = True)
formula1.head(5)

In [None]:
#counting the weekday values
formula1_by_day = formula1['weekday'].value_counts()
formula1_by_day

In [None]:
#sorting the index
formula1_by_day = formula1_by_day.sort_index()
formula1_by_day

In [None]:
#updating the font size to make it a bit larger and easier to read
matplotlib.rcParams.update({'font.size': 22})

formula1_by_day.plot(kind='bar', figsize=(20,10), title='Formula 1 Episodes Watched by Day')

In [None]:
# set our categorical and define the order so the hours are plotted 0-23
formula1['hour'] = pd.Categorical(formula1['hour'], categories=
    [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23],
    ordered=True)


formula1_by_hour = formula1['hour'].value_counts()

formula1_by_hour = formula1_by_hour.sort_index()

formula1_by_hour.plot(kind = 'bar' , figsize = (20,10) , title = "Formula1 episodes watched by hour")

# Netflix Viewing Activity During Covid Times
To view the netflix data so that we could analyse the usage during covid times(from 2019 Dec to 2021 May

In [None]:
#deleteing profile name
covidf = df.drop(['Profile Name'] , axis=1)

covidf

In [None]:
#obtaining date from Start Time
covidf['Date'] = covidf['Start Time'].dt.date

covidf['Date'] = pd.to_datetime(covidf['Date'])

covidf

In [None]:
#Data from 2019 December to 2021 May
start_date = '2019-12-19'
end_date = '2021-05-30'

mask = (covidf['Date'] > start_date) & (covidf['Date'] <= end_date)

covidf = covidf.loc[mask]
covidf

In [None]:
#Counting the dates so that the number of times watched will be available
covi = covidf.groupby(['Date']).size()
covi.head(1)

In [None]:
#plotting the data
covi.plot(figsize=(30,10), title='Viewing activity per day(during covid time)')