In [None]:
# 02_explore_rpe_revenue.ipynb

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
import importlib
import utils.revenue_data_preprocessing as rdp

# Reload the module to ensure we have the latest version
importlib.reload(rdp)

# Load the data
rpe_revenue_df = pd.read_csv('../raw_data/global_rpe_revenue.csv')

# Display basic information about the dataset
print("Original Dataset Info:")
print(rpe_revenue_df.info())

# Apply the preprocessing function (now including WLES filter)
rpe_revenue_df = rdp.preprocess_rpe_data(rpe_revenue_df)

print("\nPreprocessed Dataset Info (WLES only):")
print(rpe_revenue_df.info())

# Group by Geounit and Month, summing the RPE Revenue
monthly_revenue = rpe_revenue_df.groupby(['SL Geounit (Code)', pd.Grouper(key='Month Date', freq='M')])['RPE Revenue'].sum().reset_index()

# Display the first few rows of the grouped data
print("\nFirst few rows of grouped data:")
print(monthly_revenue.head())

# Pivot the data for easier plotting
pivot_revenue = monthly_revenue.pivot(index='Month Date', columns='SL Geounit (Code)', values='RPE Revenue')

# Plot the monthly revenue for each Geounit
plt.figure(figsize=(15, 8))
for geounit in pivot_revenue.columns:
    plt.plot(pivot_revenue.index, pivot_revenue[geounit], label=geounit)

plt.title('Monthly WLES RPE Revenue by Geounit')
plt.xlabel('Date')
plt.ylabel('RPE Revenue')
plt.legend(title='Geounit', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()

# Calculate and display total revenue by Geounit
total_revenue_by_geounit = monthly_revenue.groupby('SL Geounit (Code)')['RPE Revenue'].sum().sort_values(ascending=False)
print("\nTotal WLES Revenue by Geounit:")
print(total_revenue_by_geounit)

# Display summary statistics
print("\nSummary Statistics:")
print(monthly_revenue.describe())

# Check for any missing values
print("\nMissing Values:")
print(monthly_revenue.isnull().sum())

# Further analysis can be added here based on specific requirements and insights gained