In [1]:
# Import necessary libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os, sys
# Add the 'scripts' directory to the Python path for module imports
sys.path.append(os.path.abspath(os.path.join('..', 'scripts')))

# Set max rows and columns to display
pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 200)



In [2]:
# Configure logging
from logging_setup import SetupLogger
# Assuming this class is defined in scripts/
from data_preprocessor import DataPreprocessor  

logger = SetupLogger(log_file='../logs/notebooks.log').get_logger()

# Set figure size
plt.figure(figsize=(12, 5))

<Figure size 1200x500 with 0 Axes>

<Figure size 1200x500 with 0 Axes>

In [4]:
# Set google drive url for loading the dataset
data = 'https://drive.google.com/file/d/1Vj0JN3-cICvpks3nsEUtmtsY8eSWYW8M/view?usp=sharing'

# Setup the data preprocessor class
processor = DataPreprocessor(data, logger=logger)
# Load the data
price_data = processor.load_data()

Downloading...
From: https://drive.google.com/uc?export=download&id=1Vj0JN3-cICvpks3nsEUtmtsY8eSWYW8M
To: /home/abel/CODE/KiFiya/10-Academy-Week-10/data/data.csv
100%|██████████| 156k/156k [00:00<00:00, 384kB/s]
  self.data['Date'] = pd.to_datetime(self.data['Date'].str.strip(),  errors='coerce')


In [5]:
# Explore the first 10 rows
price_data.head(10)

Unnamed: 0,Date,Price
0,1987-05-20,18.63
1,1987-05-21,18.45
2,1987-05-22,18.55
3,1987-05-25,18.6
4,1987-05-26,18.63
5,1987-05-27,18.6
6,1987-05-28,18.6
7,1987-05-29,18.58
8,1987-06-01,18.65
9,1987-06-02,18.68


In [7]:
# Define the events dictionary
events_dict = {
    "Gulf War (1990-1991)": {"start": "1990-08-02", "end": "1991-02-28"},
    "September 11 Attacks (2001)": {"start": "2001-09-11", "end": "2001-09-11"},
    "2003 Invasion of Iraq": {"start": "2003-03-20", "end": "2011-12-18"},
    "2008 Financial Crisis": {"start": "2008-09-15", "end": "2009-07-01"},
    "Arab Spring (2010-2012)": {"start": "2010-12-18", "end": "2012-12-18"},
    "Libyan Civil War (2011)": {"start": "2011-02-15", "end": "2011-10-23"},
    "U.S. Shale Oil Boom (2010s)": {"start": "2010-01-01", "end": "2019-12-31"},
    "OPEC Production Cuts (2016-2017)": {"start": "2016-11-30", "end": "2017-06-30"},
    "Iran Nuclear Deal and Sanctions (2015-2018)": {"start": "2015-07-14", "end": "2018-05-07"},
    "COVID-19 Pandemic (2020)": {"start": "2020-03-11", "end": "2022-09-30"},
    "OPEC+ Response to COVID-19 (2020)": {"start": "2020-04-09", "end": "2020-12-31"},
    "Russia-Ukraine Conflict (2022)": {"start": "2022-02-24", "end": "2022-09-30"},
    "Global Economic Recovery (2021-2022)": {"start": "2021-01-01", "end": "2022-09-30"}
}

# Convert the events dictionary to a DataFrame
events_data = pd.DataFrame.from_dict(events_dict, orient='index')
events_data.reset_index(inplace=True)
events_data.columns = ['Event', 'Start', 'End']

# Convert Start and End to datetime
events_data['Start'] = pd.to_datetime(events_data['Start'])
events_data['End'] = pd.to_datetime(events_data['End'])