In [None]:


# --- Importing Github push module and other dependencies ---

!git clone https://github.com/blainehodder/WCSB_Supply_Demand.git
import sys
sys.path.append("/content/WCSB_Supply_Demand")

from utils.github_commit import push_df_to_github

import pandas as pd
import requests
from io import BytesIO
""

# --- Set Github Token globally for repo push ---
import os

# Set token manually each time (never saved)
os.environ["GITHUB_TOKEN"] = "ghp_..."  # Paste your token here

# --- CONFIG ---
years = list(range(2010, 2026))
base_url = "https://raw.githubusercontent.com/blainehodder/WCSB_Supply_Demand/main/raw_data/st53/ST53_{}.xls"

all_data = []

for year in years:
    try:
        print(f"Processing {year}...")
        url = base_url.format(year)
        response = requests.get(url)
        response.raise_for_status()

        xls = pd.ExcelFile(BytesIO(response.content))
        df = pd.read_excel(xls, sheet_name='BITUMEN', header=None)
        df = df.iloc[3:].reset_index(drop=True)

        column_names = df.iloc[0].tolist()
        df.columns = column_names
        df = df.drop(index=0).reset_index(drop=True)

        df = df.dropna(subset=["Operator", "Scheme Name"])

        id_cols = ['Operator', 'Scheme Name', 'Area', 'Approval Number', 'Recovery Method']
        months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
                  'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

        melted = df.melt(id_vars=id_cols, value_vars=months,
                         var_name='Month', value_name='Bitumen Production')
        melted["Year"] = year

        month_map = {'Jan': 1, 'Feb': 2, 'Mar': 3, 'Apr': 4, 'May': 5, 'Jun': 6,
                     'Jul': 7, 'Aug': 8, 'Sep': 9, 'Oct': 10, 'Nov': 11, 'Dec': 12}
        melted["Month_Num"] = melted["Month"].map(month_map)
        melted["Date"] = pd.to_datetime(dict(year=melted["Year"], month=melted["Month_Num"], day=1))

        cleaned = melted[['Date'] + id_cols + ['Bitumen Production']].copy()
        cleaned['Bitumen Production'] = pd.to_numeric(cleaned['Bitumen Production'], errors='coerce')
        cleaned = cleaned.dropna(subset=["Bitumen Production"])

        all_data.append(cleaned)

    except Exception as e:
        print(f"❌ Failed to process {year}: {e}")

# Combine and preview
final_df = pd.concat(all_data, ignore_index=True)
final_df.head(100)


Push to Github
push_df_to_github(
    df=final_df,
    user="blainehodder",
    repo="WCSB_Supply_Demand",
    path="clean_data/st53/st53_cleaned.csv",
    commit_message="Upload cleaned ST53 file"
)



fatal: destination path 'WCSB_Supply_Demand' already exists and is not an empty directory.
Processing 2010...
Processing 2011...
Processing 2012...
Processing 2013...
Processing 2014...
Processing 2015...
Processing 2016...
Processing 2017...
Processing 2018...
Processing 2019...
Processing 2020...
Processing 2021...
Processing 2022...
Processing 2023...
Processing 2024...
Processing 2025...


Unnamed: 0,Date,Operator,Scheme Name,Area,Approval Number,Recovery Method,Bitumen Production
0,2010-01-01,Baytex Energy Ltd. (¹),Cliffdale Pilot,Peace River Area 2,11034E,Commercial-CSS,0.00
1,2010-01-01,North Peace Energy Corp.,Red Earth,Peace River Area 2,11209A,Commercial-CSS,10.30
2,2010-01-01,Penn West Petroleum Ltd.,Seal,Peace River Area 2,11377A,Commercial-CSS,0.00
3,2010-01-01,Baytex Energy Ltd. (¹²),Harmon Valley Pilot,Peace River Area 2,11551,Commercial-CSS,0.00
4,2010-01-01,Shell Canada Limited,Peace River,Peace River Area 2,8143L,Commercial-CSS,1340.08
...,...,...,...,...,...,...,...
95,2010-04-01,Japan Canada Oil Sands Limited (¹),Hangingstone,Athabasca,8788I,Commercial-SAGD,783.28
96,2010-04-01,Suncor Energy Inc. (¹),Firebag,Athabasca,8870R,Commercial-SAGD,8768.90
97,2010-04-01,Total E&P Joslyn Ltd. (¹),Joslyn Creek,Athabasca,9272E,Commercial-SAGD,0.00
98,2010-04-01,ConocoPhillips Canada Resources Corp.,Surmont,Athabasca,9426F,Commercial-SAGD,3130.72


In [None]:
from google.colab import sheets
sheet = sheets.InteractiveSheet(df=final_df)

MessageError: Error: credential propagation was unsuccessful