In [1]:
import pandas as pd
import plotly.graph_objs as go
from statsmodels.tsa.stattools import adfuller

In [3]:
# Load the data
data = pd.read_csv('Commodity Prices Monthly.csv')

# Convert the data to a time series format
data = data.melt(id_vars=['Year'], var_name='Month', value_name='Price')
data['Date'] = pd.to_datetime(data['Year'].astype(str) + '-' + data['Month'], format='%Y-%b', errors='coerce')
data = data.drop(['Year', 'Month'], axis=1)
data = data.set_index('Date')
data = data.sort_index()

In [4]:
# Task 1: Perform EDA
# Plot the time series
fig = go.Figure()
fig.add_trace(go.Scatter(x=data.index, y=data['Price'], mode='lines'))
fig.update_layout(title='Brent Crude Oil Prices', xaxis_title='Date', yaxis_title='Price (USD/Barrel)')
fig.show()


In [15]:
# Check for missing or invalid values
print("Number of missing values in 'Price':", data['Price'].isnull().sum())
print("Unique values in 'Price':", data['Price'].unique())


Number of missing values in 'Price': 0
Unique values in 'Price': [24.57 27.25 23.28 27.39 29.62 28.76 30.15 32.48 31.58 32.24 26.67 19.96
 20.19 24.03 26.03 25.69 24.51 25.67 26.26 28.32 27.51 24.5 27.45 30.18
 32.36 29.45 24.79 25.37 27.16 28.21 29.4 26.78 28.97 28.83 29.58 30.56
 30.34 32.73 30.0 37.13 35.52 37.67 41.73 42.82 49.38 44.55 40.64 44.88
 45.85 53.28 53.22 49.85 55.6 57.93 63.84 63.72 59.44 56.21 57.61 63.86
 61.1 63.06 70.56 70.97 69.74 74.24 73.87 63.49 60.13 60.0 62.54 54.56
 58.96 62.36 67.49 67.92 70.55 75.84 71.17 77.0 82.47 92.06 91.51 91.92
 94.49 102.98 110.43 124.61 133.47 134.79 115.22 100.75 73.6 55.05 43.29
 45.62 43.73 47.32 51.23 58.57 69.34 65.76 73.07 68.19 77.5 75.24 76.92
 74.75 79.9 85.68 76.99 75.66 75.49 77.11 78.21 83.49 86.11 92.34 96.82
 104.09 114.62 123.13 114.53 113.91 116.68 109.82 109.96 108.8 110.61
 107.72 111.63 119.15 124.62 120.37 109.36 95.89 102.77 113.19 113.04
 111.52 109.53 109.19 112.28 116.11 103.31 103.32 103.3 107.37 110.25
 111

In [16]:
# Remove the missing value by excluding the last row
data = data[:-1]


In [18]:
data.head()

Unnamed: 0_level_0,Price
Date,Unnamed: 1_level_1
2000-02-01,24.57
2000-03-01,27.25
2000-04-01,23.28
2000-05-01,27.39
2000-06-01,29.62


In [17]:
# Calculate summary statistics
print(f"Mean: {data['Price'].mean():.2f}")
print(f"Median: {data['Price'].median():.2f}")
print(f"Standard Deviation: {data['Price'].std():.2f}")


TypeError: unsupported operand type(s) for +: 'float' and 'str'

In [None]:


# Task 2: Check for stationarity
result = adfuller(data['Price'])
print(f"ADF Statistic: {result[0]:.2f}")
print(f"p-value: {result[1]:.2f}")

# If the p-value is less than the significance level (e.g., 0.05), the series is stationary
if result[1] > 0.05:
    print("The series is non-stationary. Applying first-order differencing.")
    data['Price_diff'] = data['Price'].diff()
    data = data.dropna()
    
    # Check stationarity of the differenced series
    result = adfuller(data['Price_diff'])
    print(f"ADF Statistic (Differenced): {result[0]:.2f}")
    print(f"p-value (Differenced): {result[1]:.2f}")
else:
    print("The series is stationary.")