In [None]:
pip install pandas

In [None]:
import pandas as pd

# Load dataset
df = pd.read_csv("Global_Superstore2.csv", encoding='latin1')  # sometimes UTF-8 fails

# Quick check
print(df.head())
print(df.info())
print(df.describe())


In [None]:
df['Order Date'] = pd.to_datetime(df['Order Date'], dayfirst=True)
df['Ship Date'] = pd.to_datetime(df['Ship Date'], dayfirst=True)

In [None]:
print(df.isnull().sum())


In [None]:
df = df.drop(columns=['Postal Code'])

In [None]:
total_sales = df['Sales'].sum()
total_profit = df['Profit'].sum()
avg_order_value = df['Sales'].mean()

print("Total Sales:", total_sales)
print("Total Profit:", total_profit)
print("Average Order Value:", avg_order_value)


In [None]:
pip install matplotlib


In [None]:
import matplotlib.pyplot as plt

monthly_sales = df.groupby(df['Order Date'].dt.to_period('M'))['Sales'].sum()
monthly_sales.plot(kind='line', figsize=(12,6), title='Monthly Sales Trend')
plt.ylabel('Sales')
plt.show()

In [None]:
top_products = df.groupby('Product Name')['Sales'].sum().sort_values(ascending=False).head(10)
top_products.plot(kind='bar', figsize=(12,6), title='Top 10 Products by Sales')
plt.ylabel('Sales')
plt.show()

category_sales = df.groupby('Category')['Sales'].sum()
category_sales.plot(kind='pie', autopct='%1.1f%%', figsize=(7,7), title='Sales by Category')
plt.show()


In [None]:
pip install seaborn

In [None]:
import seaborn as sns

region_category_profit = df.pivot_table(index='Category', columns='Region', values='Profit', aggfunc='sum')
sns.heatmap(region_category_profit, annot=True, fmt=".0f", cmap="YlGnBu")
plt.title("Profit by Category and Region")
plt.show()


In [None]:
# Group sales by month
monthly_sales = df.groupby(df['Order Date'].dt.to_period('M'))['Sales'].sum()
monthly_sales = monthly_sales.to_timestamp()  # convert Period to Timestamp

monthly_sales.head()


In [None]:
pip install prophet


In [None]:
from prophet import Prophet

# Prepare data for Prophet
prophet_df = monthly_sales.reset_index()
prophet_df.columns = ['ds', 'y']  # Prophet requires ds (date) and y (value)

# Initialize & fit
model = Prophet()
model.fit(prophet_df)

# Future dataframe (next 12 months)
future = model.make_future_dataframe(periods=12, freq='M')
forecast = model.predict(future)

# Plot forecast
fig = model.plot(forecast)


In [None]:
pip install statsmodels


In [None]:
from statsmodels.tsa.arima.model import ARIMA
import matplotlib.pyplot as plt

# Fit ARIMA model
model_arima = ARIMA(monthly_sales, order=(1,1,1))  # simple ARIMA(p,d,q)
arima_fit = model_arima.fit()

# Forecast next 12 months
forecast_arima = arima_fit.forecast(steps=12)

# Plot
plt.figure(figsize=(12,6))
plt.plot(monthly_sales, label='Historical Sales')
plt.plot(forecast_arima.index, forecast_arima, label='ARIMA Forecast', color='red')
plt.legend()
plt.title("ARIMA Forecast")
plt.show()


In [None]:
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error
import numpy as np

# Prophet predictions (align last 12 months)
prophet_pred = forecast.set_index('ds')['yhat'][-12:]
actual = monthly_sales[-12:]

mape_prophet = mean_absolute_percentage_error(actual, prophet_pred)
rmse_prophet = np.sqrt(mean_squared_error(actual, prophet_pred))

print("Prophet - MAPE:", mape_prophet, "RMSE:", rmse_prophet)

# ARIMA
arima_pred = forecast_arima[:12]
mape_arima = mean_absolute_percentage_error(actual, arima_pred)
rmse_arima = np.sqrt(mean_squared_error(actual, arima_pred))

print("ARIMA - MAPE:", mape_arima, "RMSE:", rmse_arima)


In [None]:
pip install scikit-learn
