# Day 9: Instagram Stories Daily User Creation Patterns

You are a Product Analyst on the Instagram Stories team investigating story creation patterns. The team wants to understand the distribution of stories created by users daily. You will analyze user storytelling behavior to optimize engagement strategies.

In [2]:
import pandas as pd
import numpy as np

stories_data_data = [
  {
    "user_id": "user_001",
    "story_date": "2024-07-03",
    "story_count": 3
  },
  {
    "user_id": "user_001",
    "story_date": "2024-07-03",
    "story_count": 3
  },
  {
    "user_id": "user_001",
    "story_date": "2024-08-15",
    "story_count": 5
  },
  {
    "user_id": "user_001",
    "story_date": "2024-09-10",
    "story_count": 0
  },
  {
    "user_id": "user_001",
    "story_date": "2024-10-05",
    "story_count": 20
  },
  {
    "user_id": "user_001",
    "story_date": "07/15/2024",
    "story_count": 2
  },
  {
    "user_id": "user_002",
    "story_date": "2024-07-03",
    "story_count": 4
  },
  {
    "user_id": " user_002",
    "story_date": "2024-07-04",
    "story_count": 3
  },
  {
    "user_id": "user_002",
    "story_date": None,
    "story_count": 6
  },
  {
    "user_id": "user_002",
    "story_date": "2024-12-25",
    "story_count": 1
  },
  {
    "user_id": "user_002",
    "story_date": "2025-01-15",
    "story_count": 7
  },
  {
    "user_id": "user_002",
    "story_date": "2025-06-29",
    "story_count": 10
  },
  {
    "user_id": "user_003",
    "story_date": "2024-07-10",
    "story_count": 2
  },
  {
    "user_id": "user_003",
    "story_date": "2024-08-20",
    "story_count": 8
  },
  {
    "user_id": "user_003",
    "story_date": "2024-08-20",
    "story_count": 8
  },
  {
    "user_id": "user_003",
    "story_date": "2025-03-11",
    "story_count": 5
  },
  {
    "user_id": None,
    "story_date": "2025-03-12",
    "story_count": 3
  },
  {
    "user_id": "USER_003",
    "story_date": "2025-04-01",
    "story_count": 4
  },
  {
    "user_id": "user_004",
    "story_date": "2024-07-15",
    "story_count": 6
  },
  {
    "user_id": "user_004",
    "story_date": "2024-09-30",
    "story_count": 7
  },
  {
    "user_id": "user_004",
    "story_date": "2024/10/10",
    "story_count": 4
  },
  {
    "user_id": "user_004",
    "story_date": "2024-11-11",
    "story_count": 3
  },
  {
    "user_id": "user_004",
    "story_date": "2025-02-28",
    "story_count": 12
  },
  {
    "user_id": "user_004",
    "story_date": "2025-03-01",
    "story_count": 0
  },
  {
    "user_id": "user_005",
    "story_date": "2024-08-01",
    "story_count": 1
  },
  {
    "user_id": "user_005",
    "story_date": "2024-08-02",
    "story_count": 2
  },
  {
    "user_id": "user_005",
    "story_date": "2024-08-03",
    "story_count": 3
  },
  {
    "user_id": "user_005",
    "story_date": "2024-08-04",
    "story_count": 4
  },
  {
    "user_id": "user_005",
    "story_date": "2024-08-05",
    "story_count": None
  },
  {
    "user_id": "user_005",
    "story_date": "2024-08-06",
    "story_count": 5
  },
  {
    "user_id": "user_006",
    "story_date": "2024-09-01",
    "story_count": 9
  },
  {
    "user_id": "user_006",
    "story_date": "2024-09-02",
    "story_count": 10
  },
  {
    "user_id": "user_006",
    "story_date": "2024-09-03",
    "story_count": 9
  },
  {
    "user_id": "user_006",
    "story_date": "2024-09-04",
    "story_count": 50
  },
  {
    "user_id": "user_006",
    "story_date": "2024-09-05",
    "story_count": 8
  },
  {
    "user_id": "user_006",
    "story_date": None,
    "story_count": 7
  },
  {
    "user_id": "user_007",
    "story_date": "2024-10-10",
    "story_count": 4
  },
  {
    "user_id": "user_007",
    "story_date": "2024-10-11",
    "story_count": 4
  },
  {
    "user_id": "user_007",
    "story_date": "2024-10-12",
    "story_count": 4
  },
  {
    "user_id": "user_007",
    "story_date": "2024-10-13",
    "story_count": 3
  },
  {
    "user_id": "user_007",
    "story_date": "2024-10-14",
    "story_count": 2
  },
  {
    "user_id": "user_007",
    "story_date": "2024-10-15",
    "story_count": 1
  },
  {
    "user_id": "user_008",
    "story_date": "2025-01-01",
    "story_count": 11
  },
  {
    "user_id": "user_008",
    "story_date": "2025-01-02",
    "story_count": 12
  },
  {
    "user_id": "user_008",
    "story_date": "2025-01-03",
    "story_count": 13
  },
  {
    "user_id": "user_008",
    "story_date": "2025-01-04",
    "story_count": 14
  },
  {
    "user_id": "user_008",
    "story_date": "2025-01-05",
    "story_count": 15
  },
  {
    "user_id": "user_008",
    "story_date": "2025-01-06",
    "story_count": 0
  },
  {
    "user_id": "user_009",
    "story_date": "2024-12-01",
    "story_count": 1
  },
  {
    "user_id": "user_009",
    "story_date": "2024-12-02",
    "story_count": 2
  },
  {
    "user_id": "user_009",
    "story_date": "2024-12-03",
    "story_count": 3
  },
  {
    "user_id": "user_009",
    "story_date": "2024-12-04",
    "story_count": 4
  },
  {
    "user_id": "user_009",
    "story_date": "2024-12-05",
    "story_count": 5
  },
  {
    "user_id": "user_009",
    "story_date": "invalid_date",
    "story_count": 6
  },
  {
    "user_id": "user_010",
    "story_date": "2025-03-15",
    "story_count": 7
  },
  {
    "user_id": "user_010",
    "story_date": "2025-03-16",
    "story_count": 8
  },
  {
    "user_id": "user_010",
    "story_date": "2025-03-17",
    "story_count": 9
  },
  {
    "user_id": "user_010",
    "story_date": "2025-03-18",
    "story_count": 10
  },
  {
    "user_id": "user_010",
    "story_date": "2025-03-19",
    "story_count": 11
  },
  {
    "user_id": "user_010",
    "story_date": "2025-03-20",
    "story_count": 12
  }
]
stories_data = pd.DataFrame(stories_data_data)


## Question 1

Take a look at the data in the story_date column. Correct any data type inconsistencies in that column.

In [3]:
# Convert story_date to datetime, coercing errors to NaT for invalid entries
stories_data['story_date'] = pd.to_datetime(stories_data['story_date'], errors='coerce')

# Check if any values could not be converted
invalid_dates = stories_data['story_date'].isna().sum()

print(f"Number of invalid or missing dates: {invalid_dates}")

Number of invalid or missing dates: 5


## Question 2

Calculate the 25th, 50th, and 75th percentiles of the number of stories created per user per day.

In [4]:
# Ensure data is grouped by user_id and story_date
stories_per_user_day = (
    stories_data
    .groupby(['user_id', 'story_date'])['story_count']
    .sum()
    .reset_index()
)

# Calculate percentiles
percentiles = stories_per_user_day['story_count'].quantile([0.25, 0.50, 0.75])

print(percentiles)

0.25     3.0
0.50     5.0
0.75    10.0
Name: story_count, dtype: float64


## Question 3

What percentage of users have had at least one day, where they posted more than 10 stories on that day?

In [5]:
# Group by user and day to get total stories per user per day
stories_per_user_day = (
    stories_data
    .groupby(['user_id', 'story_date'])['story_count']
    .sum()
    .reset_index()
)

# Identify users who had >10 stories at least once
users_with_over_10 = stories_per_user_day.loc[stories_per_user_day['story_count'] > 10, 'user_id'].unique()

# Calculate percentage
percentage_users = (len(users_with_over_10) / stories_data['user_id'].nunique()) * 100

print(percentage_users)

50.0


Made with ❤️ by [Interview Master](https://www.interviewmaster.ai)