In [14]:
import pandas as pd
import numpy as np
import json
import os
from datetime import datetime

In [15]:
os.makedirs("../dashboard/dashboard_data", exist_ok=True)

print("🚀 Starting data processing to generate dashboard JSON files...\n")

🚀 Starting data processing to generate dashboard JSON files...



In [16]:
with open("../data/raw/BrentOilPrices.csv", "r") as f:
    content = f.read().strip()

# Split by comma, then group every two values: (Date, Price)
parts = [p.strip() for p in content.split(",")]

# Handle mixed format: '20-May-87' and '"Jul 04, 2022"'
dates = []
prices = []

i = 0
while i < len(parts):
    date_str = parts[i].replace('"', '').strip()
    if i + 1 >= len(parts):
        break
    try:
        price = float(parts[i + 1])
        dates.append(date_str)
        prices.append(price)
        i += 2
    except ValueError:
        # Skip invalid price
        i += 1

In [17]:
# Create DataFrame
df = pd.DataFrame({"Date": dates, "Price": prices})

# Parse dates: two formats
def parse_date(date_str):
    date_str = date_str.strip()
    try:
        return datetime.strptime(date_str, "%d-%b-%y")  # e.g., 20-May-87
    except:
        try:
            return datetime.strptime(date_str, "%b %d, %Y")  # e.g., Jul 04, 2022
        except:
            return None

df["Date"] = df["Date"].apply(parse_date)
df = df.dropna().reset_index(drop=True)
df = df.sort_values("Date").reset_index(drop=True)

print(f"✅ Cleaned data: {len(df)} rows | Range: {df['Date'].min()} to {df['Date'].max()}")

✅ Cleaned data: 0 rows | Range: nan to nan


In [18]:
prices_json = [
    {"Date": row["Date"].strftime("%Y-%m-%d"), "Price": round(row["Price"], 2)}
    for _, row in df.iterrows()
]

with open("../dashboard/dashboard_data/prices.json", "w") as f:
    json.dump(prices_json, f, indent=2)

print("✅ Saved: dashboard_data/prices.json")

✅ Saved: dashboard_data/prices.json


In [19]:
events_data = [
    {"Date": "1990-08-02", "Event_Description": "Iraq invades Kuwait", "Event_Type": "Conflict"},
    {"Date": "1991-01-17", "Event_Description": "Gulf War begins", "Event_Type": "Conflict"},
    {"Date": "2001-09-11", "Event_Description": "9/11 Attacks", "Event_Type": "Geopolitical"},
    {"Date": "2003-03-20", "Event_Description": "US-led invasion of Iraq", "Event_Type": "Conflict"},
    {"Date": "2008-09-15", "Event_Description": "Lehman Brothers collapse", "Event_Type": "Economic_Crisis"},
    {"Date": "2011-02-15", "Event_Description": "Arab Spring begins", "Event_Type": "Political_Unrest"},
    {"Date": "2014-11-27", "Event_Description": "OPEC refuses to cut production", "Event_Type": "OPEC_Policy"},
    {"Date": "2016-11-30", "Event_Description": "OPEC agrees to cut output", "Event_Type": "OPEC_Policy"},
    {"Date": "2018-05-08", "Event_Description": "US exits Iran Nuclear Deal", "Event_Type": "Sanction"},
    {"Date": "2020-03-08", "Event_Description": "Russia-Saudi price war", "Event_Type": "Conflict/OPEC"},
    {"Date": "2020-04-20", "Event_Description": "WTI crude futures crash to -$37", "Event_Type": "Market_Anomaly"},
    {"Date": "2022-02-24", "Event_Description": "Russia invades Ukraine", "Event_Type": "Conflict"},
    {"Date": "2022-03-08", "Event_Description": "US bans Russian oil imports", "Event_Type": "Sanction"},
    {"Date": "2022-06-03", "Event_Description": "EU partial Russian oil embargo", "Event_Type": "Sanction"},
    {"Date": "2020-01-30", "Event_Description": "WHO declares global health emergency (Covid-19)", "Event_Type": "Pandemic"}
]

# Convert event dates to datetime for consistency
for e in events_data:
    e["Date"] = pd.to_datetime(e["Date"]).strftime("%Y-%m-%d")

with open("../dashboard/dashboard_data/events.json", "w") as f:
    json.dump(events_data, f, indent=2)

print("✅ Saved: dashboard_data/events.json")

✅ Saved: dashboard_data/events.json
