In [7]:
import pandas as pd

# Getting rid of the annoying SettingWithCopyWarning.
# This script sets the pandas option for copy-on-write behavior.
pd.options.mode.copy_on_write = True


You are a Product Analyst working with the Nintendo Switch 2 pre-sales team to analyze regional pre-order patterns and customer segmentation. Your team needs to understand how different demographics influence pre-sale volumes across regions. You will leverage historical pre-sale transaction data to extract meaningful insights that can guide marketing strategies.

In [8]:
# Load the CSV file into a DataFrame
pre_sale_data = pd.read_csv('pre_sale_data.csv')

# Display the DataFrame
print("DataFrame loaded from pre_sale_data.csv:")
print(pre_sale_data)


DataFrame loaded from pre_sale_data.csv:
           region customer_id pre_order_date demographic_group  \
0   North America        C001     2024-07-02             Gamer   
1          Europe        C002     2024-07-03            Casual   
2            Asia        C003     2024-07-04   Tech Enthusiast   
3   Latin America        C004     2024-07-05            Family   
4         Oceania        C005     2024-07-06           Student   
5   North America        C006     2024-07-07             Gamer   
6          Europe        C007     2024-07-08               NaN   
7             NaN        C008     2024-07-09            Casual   
8            Asia        C009     2024-07-10            Family   
9   North America        C010     2024-07-11             Gamer   
10  North America        C010     2024-07-11             Gamer   
11         Europe        C011     2024-07-12           Student   
12           Asia        C012     2024-07-13            Casual   
13  Latin America        C013     2

### Question 1 of 3

What percentage of records have missing values in at least one column? Handle the missing values, so that we have a cleaned dataset to work with.

In [9]:
# Calculate percentage of records with missing values
missing_count = pre_sale_data.isnull().any(axis=1).sum()
total_count = len(pre_sale_data)
missing_percentage = (missing_count / total_count) * 100

print(f"Percentage of records with missing values: {missing_percentage:.2f}%")


Percentage of records with missing values: 6.67%


In [10]:
# Drop rows with any missing values
cleaned_pre_sale_data = pre_sale_data.dropna()
print(f"Cleaned dataset shape: {cleaned_pre_sale_data.shape}")


Cleaned dataset shape: (56, 5)


### Question 2 of 3

Using the cleaned data, calculate the total pre-sale orders per month for each region and demographic group.

Take your time to think about how you might group and aggregate the data, and how to reshape it for easier analysis.

In [11]:
# Ensure pre_order_date is datetime
cleaned_pre_sale_data['pre_order_date'] = pd.to_datetime(cleaned_pre_sale_data['pre_order_date'])

# Create a 'month' column
cleaned_pre_sale_data['month'] = cleaned_pre_sale_data['pre_order_date'].dt.to_period('M')

# Group and aggregate
monthly_orders = (
    cleaned_pre_sale_data
    .groupby(['region', 'demographic_group', 'month'])['pre_order_quantity']
    .sum()
    .reset_index()
)

# Optional: Pivot for easier analysis
pivot_table = monthly_orders.pivot_table(
    index=['month'],
    columns=['region', 'demographic_group'],
    values='pre_order_quantity',
    fill_value=0
)

# Display the results
print("Monthly pre-sale orders per region and demographic group:")
print(monthly_orders.head())

print("\nPivot table for easier analysis:")
print(pivot_table.head())


Monthly pre-sale orders per region and demographic group:
  region demographic_group    month  pre_order_quantity
0   Asia            Casual  2024-07                   4
1   Asia            Casual  2024-08                   8
2   Asia            Family  2024-07                   4
3   Asia             Gamer  2024-07                   2
4   Asia             Gamer  2024-08                   4

Pivot table for easier analysis:
region              Asia                                      Europe         \
demographic_group Casual Family Gamer Student Tech Enthusiast Casual Family   
month                                                                         
2024-07              4.0    4.0   2.0     3.0             1.0    2.0    4.0   
2024-08              8.0    0.0   4.0     3.0             0.0    2.0    4.0   

region                          Latin America  ...                  \
demographic_group Gamer Student        Casual  ... Tech Enthusiast   
month                               

### Question 3 of 3

Predict the total pre-sales quantity for each region for September 2024. Assume that the growth rate from August to September is the same as the growth rate from July to August in each region.

Think about how you might calculate monthly totals, compute growth rates, and then apply those rates to forecast September.

In [12]:
# Calculate monthly total pre-sales per region
region_monthly_totals = (
    cleaned_pre_sale_data
    .groupby(['region', 'month'])['pre_order_quantity']
    .sum()
    .reset_index()
)

# Filter for July and August 2024
region_monthly_totals['month'] = region_monthly_totals['month'].astype(str)
july_totals = region_monthly_totals[region_monthly_totals['month'] == '2024-07']
august_totals = region_monthly_totals[region_monthly_totals['month'] == '2024-08']

# Merge July and August totals for growth calculation
merged = pd.merge(
    july_totals, 
    august_totals, 
    on='region', 
    suffixes=('_july', '_august')
)

# Calculate growth rate per region
merged['growth_rate'] = (
    (merged['pre_order_quantity_august'] - merged['pre_order_quantity_july']) 
    / merged['pre_order_quantity_july']
)

# Display growth rates
print("Growth rates per region:")
print(merged[['region', 'growth_rate']])

# Predict September totals
merged['pre_order_quantity_september'] = (
    merged['pre_order_quantity_august'] * (1 + merged['growth_rate'])
).round().astype(int)

# Prepare final output
september_forecast = merged[['region', 'pre_order_quantity_september']]

# Display the forecast for September
print("\nPredicted total pre-sales quantity for each region for September 2024:")
print(september_forecast)


Growth rates per region:
          region  growth_rate
0           Asia     0.071429
1         Europe    -0.333333
2  Latin America     0.076923
3  North America     0.461538
4        Oceania    -0.285714

Predicted total pre-sales quantity for each region for September 2024:
          region  pre_order_quantity_september
0           Asia                            16
1         Europe                             7
2  Latin America                            15
3  North America                            28
4        Oceania                             7
