In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder


In [None]:
df = pd.read_excel('/content/Walmart Sales.xlsx')

In [None]:
df.dropna(inplace=True)

In [None]:
df.head()

Unnamed: 0,Invoice ID,Branch,City,Customer type,Gender,Product line,Unit price,Quantity,Date,Time,Payment,Rating
0,750-67-8428,A,Yangon,Member,Female,Health and beauty,74.69,7,1/5/2019,13:08:00,Ewallet,9.1
1,226-31-3081,A,Naypyitaw,Normal,Female,Electronic accessories,15.28,5,3/8/2019,10:29:00,Cash,9.6
2,631-41-3108,A,Yangon,Normal,Male,Home and lifestyle,46.33,7,3/3/2019,13:23:00,Credit card,7.4
3,123-19-1176,B,Yangon,Member,Male,Health and beauty,58.22,8,1/27/2019,20:33:00,Ewallet,8.4
4,373-73-7910,C,Yangon,Normal,Male,Sports and travel,86.31,7,2/8/2019,10:37:00,Ewallet,5.3


In [None]:
label_encoders = {}
categorical_cols = ['Branch', 'City', 'Customer type', 'Gender', 'Product line', 'Payment']
for col in categorical_cols:
    label_encoders[col] = LabelEncoder()
    df[col] = label_encoders[col].fit_transform(df[col])


In [None]:
X = df[['Branch', 'City', 'Customer type', 'Gender', 'Product line', 'Unit price', 'Quantity', 'Payment']]
y_sales = df['Quantity']
y_revenue = df['Unit price'] * df['Quantity']  # Calculate revenue


In [None]:
X_train, X_test, y_sales_train, y_sales_test, y_revenue_train, y_revenue_test = train_test_split(X, y_sales, y_revenue, test_size=0.2, random_state=42)

In [None]:
rf_sales = RandomForestRegressor(n_estimators=100, random_state=42)
rf_sales.fit(X_train, y_sales_train)


In [None]:
rf_revenue = RandomForestRegressor(n_estimators=100, random_state=42)
rf_revenue.fit(X_train, y_revenue_train)


In [None]:
sales_predictions = rf_sales.predict(X_test)
revenue_predictions = rf_revenue.predict(X_test)

In [None]:
mae_sales = mean_absolute_error(y_sales_test, sales_predictions)
mse_sales = mean_squared_error(y_sales_test, sales_predictions)
r2_sales = r2_score(y_sales_test, sales_predictions)

In [None]:
mae_revenue = mean_absolute_error(y_revenue_test, revenue_predictions)
mse_revenue = mean_squared_error(y_revenue_test, revenue_predictions)
r2_revenue = r2_score(y_revenue_test, revenue_predictions)


In [None]:
print("Sales prediction evaluation:")
print("Mean Absolute Error:", mae_sales)
print("Mean Squared Error:", mse_sales)
print("R-squared:", r2_sales)


Sales prediction evaluation:
Mean Absolute Error: 0.0
Mean Squared Error: 0.0
R-squared: 1.0


In [None]:
print("\nRevenue prediction evaluation:")
print("Mean Absolute Error:", mae_revenue)
print("Mean Squared Error:", mse_revenue)
print("R-squared:", r2_revenue)


Revenue prediction evaluation:
Mean Absolute Error: 5.197942499999982
Mean Squared Error: 61.697158954750336
R-squared: 0.998954470386578


In [None]:
import pandas as pd

# Load the dataset
df = pd.read_excel('/content/Walmart Sales.xlsx')

# Handle missing values if any
df.dropna(inplace=True)

# Group by 'Branch' and 'City' and calculate the average price
average_price_by_branch_city = df.groupby(['Branch', 'City'])['Unit price'].mean()

print("Average price of an item sold at each branch of the city:")
print(average_price_by_branch_city)


Average price of an item sold at each branch of the city:
Branch  City     
A       Mandalay     53.353866
        Naypyitaw    54.123182
        Yangon       55.639298
B       Mandalay     56.133305
        Naypyitaw    57.785688
        Yangon       56.011062
C       Mandalay     57.958316
        Naypyitaw    57.941009
        Yangon       52.684602
Name: Unit price, dtype: float64
