# Netflix Viewing Activity Analysis

This notebook provides a comprehensive analysis of Netflix viewing patterns, including:

1. Basic viewing statistics
2. Daily and monthly patterns
3. Binge-watching detection
4. Weekly viewing patterns
5. Day of week analysis

Let's start by importing the required libraries.

In [None]:
import csv
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
import seaborn as sns
from collections import defaultdict

## Data Loading and Processing

We'll load the viewing activity data from the CSV file and process it to extract dates and durations.

In [None]:
dates = []
durations = []

with open('/Users/emreozkul/Desktop/dsa-project/attention-span-analysis/data/ViewingActivity.csv', 'r') as file:
    reader = csv.DictReader(file)

    for row in reader:
        if row["Profile Name"] == "C":
            date = datetime.strptime(row["Start Time"].split(" ")[0], '%Y-%m-%d')
            
            # Convert duration string (e.g., "1:23:45") to minutes
            duration_parts = row["Duration"].split(':')
            duration_minutes = (int(duration_parts[0]) * 60 + 
                             int(duration_parts[1]) +
                             int(duration_parts[2]) / 60)
            
            dates.append(date)
            durations.append(duration_minutes)

## Basic Statistics

Let's calculate and display some basic statistics about viewing patterns.

In [None]:
sessions = len(dates)
sum_durations = sum(durations)
avg_duration = sum_durations / sessions

print(f"Total Sessions: {sessions}")
print(f"Total Duration (minutes): {sum_durations:.2f}")
print(f"Average Duration per Session (minutes): {avg_duration:.2f}")

## Daily Viewing Patterns

This analysis shows how viewing duration varies day by day.

In [None]:
def plot_data():
    plt.figure(figsize=(12, 6))
    plt.plot(dates, durations, marker='o')
    
    plt.gcf().autofmt_xdate()
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
    
    plt.xlabel('Date')
    plt.ylabel('Duration (minutes)')
    plt.title('Viewing Duration Over Time')
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.tight_layout()
    plt.show()

plot_data()

## Monthly Average Analysis

Let's analyze how viewing patterns change month by month.

In [None]:
# Calculate monthly averages
df = pd.DataFrame({'date': dates, 'duration': durations})
df['month_year'] = df['date'].dt.to_period('M')
monthly_avg = df.groupby('month_year')['duration'].mean().reset_index()
monthly_avg['month_year'] = monthly_avg['month_year'].dt.to_timestamp()

# Create the plot
plt.figure(figsize=(12, 6))
plt.plot(monthly_avg['month_year'], monthly_avg['duration'], marker='o', linewidth=2)

plt.gcf().autofmt_xdate()
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
plt.xlabel('Month')
plt.ylabel('Average Duration (minutes)')
plt.title('Monthly Average Viewing Duration')
plt.grid(True, linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()

# Print monthly statistics
print("\nMonthly Statistics:")
for _, row in monthly_avg.iterrows():
    print(f"{row['month_year'].strftime('%Y-%m')}: {row['duration']:.2f} minutes")

## Binge-Watching Analysis

This analysis identifies potential binge-watching sessions based on consecutive viewing patterns.

In [None]:
def detect_binge_watching(threshold_hours=3):
    df = pd.DataFrame({'date': dates, 'duration': durations})
    df = df.sort_values('date')
    
    binge_sessions = []
    current_binge = []
    
    for i in range(len(df)-1):
        current_session = df.iloc[i]
        next_session = df.iloc[i+1]
        
        if (next_session['date'] - current_session['date']) <= timedelta(hours=threshold_hours):
            if not current_binge:
                current_binge.append(current_session)
            current_binge.append(next_session)
        else:
            if len(current_binge) > 1:
                binge_sessions.append(current_binge)
            current_binge = []
    
    print(f"\nBinge Watching Sessions (>{threshold_hours}h gap):")
    for binge in binge_sessions:
        total_duration = sum(session['duration'] for session in binge)
        print(f"Date: {binge[0]['date'].strftime('%Y-%m-%d')}, Episodes: {len(binge)}, Total Duration: {total_duration:.2f} minutes")

detect_binge_watching()

## Weekly Viewing Patterns

Analysis of viewing patterns on a weekly basis.

In [None]:
def analyze_weekly_patterns():
    df = pd.DataFrame({'date': dates, 'duration': durations})
    
    # Get the start of each week
    df['week_start'] = df['date'].dt.to_period('W').dt.start_time
    
    # Group by week start date
    weekly_total = df.groupby('week_start')['duration'].sum().reset_index()
    
    plt.figure(figsize=(12, 6))
    plt.plot(weekly_total['week_start'], weekly_total['duration'], marker='o')
    
    plt.gcf().autofmt_xdate()
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
    
    plt.title('Weekly Total Viewing Time')
    plt.xlabel('Week Starting Date')
    plt.ylabel('Total Duration (minutes)')
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.tight_layout()
    plt.show()

analyze_weekly_patterns()

## Day of Week Analysis

Let's examine viewing patterns across different days of the week.

In [None]:
def analyze_day_of_week():
    df = pd.DataFrame({'date': dates, 'duration': durations})
    df['day_of_week'] = df['date'].dt.day_name()
    
    # Calculate average duration by day of week
    day_avg = df.groupby('day_of_week')['duration'].agg(['mean', 'count']).reset_index()
    
    # Set specific day order
    day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
    day_avg['day_of_week'] = pd.Categorical(day_avg['day_of_week'], categories=day_order, ordered=True)
    day_avg = day_avg.sort_values('day_of_week')
    
    # Create bar plot
    plt.figure(figsize=(12, 6))
    plt.bar(day_avg['day_of_week'], day_avg['mean'])
    plt.title('Average Viewing Duration by Day of Week')
    plt.xlabel('Day of Week')
    plt.ylabel('Average Duration (minutes)')
    plt.xticks(rotation=45)
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.tight_layout()
    plt.show()
    
    print("\nViewing Statistics by Day of Week:")
    for _, row in day_avg.iterrows():
        print(f"{row['day_of_week']}: {row['mean']:.2f} minutes (Sessions: {row['count']})")    

analyze_day_of_week()

## Analysis Summary

This notebook provides comprehensive insights into Netflix viewing patterns:

1. **Basic Statistics**: Total viewing time and average session duration
2. **Daily Patterns**: Individual viewing sessions and daily variations
3. **Monthly Trends**: Long-term changes in viewing habits
4. **Binge-Watching**: Identification of extended viewing sessions
5. **Weekly Patterns**: Total viewing time per week
6. **Day of Week Analysis**: Viewing preferences across different days

These insights can be valuable for:
- Understanding personal viewing habits
- Identifying patterns in content consumption
- Analyzing changes in attention span over time