In [28]:
!git branch

  backend[m
* [32mfeature/model[m
  main[m


In [29]:
# Install necessary libraries (uncomment if running for the first time)
# !pip install pandas numpy scikit-learn requests beautifulsoup4 matplotlib seaborn

import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error


In [30]:
# URL of Ethiopian Electric Utility Tariff Page
url = "http://www.ethiopianelectricutility.gov.et/electricity-tariff/detail/85?lang=en"

# Fetch the webpage content
response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")

# Extract the tariff data (assuming it's inside a table)
tables = soup.find_all("table")

# Convert the table into a pandas DataFrame (manual extraction needed)
# Example tariff data
tariff_data = {
    "Level": [1, 2, 3, 4, 5, 6, 7],
    "kWh Range": ["0-50", "51-100", "101-200", "201-300", "301-400", "401-500", "500+"],
    "Price per kWh (Birr)": [0.273, 0.773, 1.473, 2.000, 2.327, 2.552, 2.900]
}

df_tariff = pd.DataFrame(tariff_data)
print(df_tariff)


   Level kWh Range  Price per kWh (Birr)
0      1      0-50                 0.273
1      2    51-100                 0.773
2      3   101-200                 1.473
3      4   201-300                 2.000
4      5   301-400                 2.327
5      6   401-500                 2.552
6      7      500+                 2.900


In [1]:
import pandas as pd
import numpy as np

# Define sample countries and regions
countries = ["USA", "Germany", "France", "India", "China", "Brazil", "Ethiopia", "South Africa", "UK", "Canada"]
household_sizes = [1, 2, 3, 4, 5, 6]  # Number of people in household
seasonal_factors = {"Winter": 1.2, "Spring": 0.9, "Summer": 1.1, "Autumn": 1.0}  # Seasonal consumption variation
appliance_types = ["Heating", "Cooling", "Lighting", "Cooking", "Entertainment", "Others"]

# Define base tariffs per country (USD per kWh)
tariffs = {
    "USA": 0.15, "Germany": 0.30, "France": 0.25, "India": 0.08, "China": 0.10,
    "Brazil": 0.12, "Ethiopia": 0.06, "South Africa": 0.14, "UK": 0.22, "Canada": 0.13
}

# Generate synthetic data
num_samples = 10000
data = []

for _ in range(num_samples):
    country = np.random.choice(countries)
    household_size = np.random.choice(household_sizes)
    season = np.random.choice(list(seasonal_factors.keys()))
    tariff = tariffs[country]

    # Simulating daily and monthly energy consumption based on household size and seasonal variation
    base_daily_consumption = np.random.uniform(5, 30)  # Base daily consumption in kWh
    daily_consumption = base_daily_consumption * seasonal_factors[season]
    monthly_consumption = daily_consumption * 30

    # Appliance-specific consumption (randomly split among appliance types)
    appliance_usage = {appliance: np.random.uniform(0.1, 0.5) * daily_consumption for appliance in appliance_types}

    # Total cost calculation
    monthly_cost = monthly_consumption * tariff

    # Append to dataset
    data.append([country, household_size, season, daily_consumption, monthly_consumption, tariff, monthly_cost, appliance_usage])

# Create DataFrame
global_energy_data = pd.DataFrame(data, columns=[
    "Country", "Household_Size", "Season", "Daily_Consumption_kWh",
    "Monthly_Consumption_kWh", "Tariff_per_kWh", "Monthly_Cost", "Appliance_Usage"
])

# Save the dataset for later use
global_energy_data.to_csv("simulated_global_energy_data.csv", index=False)

# Show sample of the dataset
print(global_energy_data.head())


        Country  Household_Size  Season  Daily_Consumption_kWh  \
0         India               3  Autumn              19.536241   
1       Germany               6  Autumn              29.834938   
2        Brazil               4  Winter              10.783258   
3        Brazil               1  Spring              26.430403   
4  South Africa               5  Autumn              24.625557   

   Monthly_Consumption_kWh  Tariff_per_kWh  Monthly_Cost  \
0               586.087227            0.08     46.886978   
1               895.048155            0.30    268.514446   
2               323.497739            0.12     38.819729   
3               792.912097            0.12     95.149452   
4               738.766697            0.14    103.427338   

                                     Appliance_Usage  
0  {'Heating': 4.973335574651555, 'Cooling': 3.88...  
1  {'Heating': 6.16148797271564, 'Cooling': 13.38...  
2  {'Heating': 1.127410079814348, 'Cooling': 5.33...  
3  {'Heating': 11.2141

In [None]:
import pandas as pd

# Load the dataset
df = pd.read_csv("simulated_global_energy_data.csv")

# Display basic info
print(df.info())
print(df.head())


In [None]:
from sklearn.preprocessing import StandardScaler

# Drop the 'Appliance_Usage' column (since it's a dictionary inside the dataset)
df = df.drop(columns=["Appliance_Usage"])

# Check for missing values
df = df.dropna()

# Encode categorical variables (Country and Season)
df = pd.get_dummies(df, columns=["Country", "Season"], drop_first=True)

# Scale the numeric features
scaler = StandardScaler()
df[["Daily_Consumption_kWh", "Monthly_Consumption_kWh", "Tariff_per_kWh"]] = scaler.fit_transform(
    df[["Daily_Consumption_kWh", "Monthly_Consumption_kWh", "Tariff_per_kWh"]]
)

# Show preprocessed data
print(df.head())


In [4]:
from sklearn.model_selection import train_test_split

# Define features (X) and target variable (y)
X = df.drop(columns=["Monthly_Cost"])  # Features
y = df["Monthly_Cost"]  # Target variable (what we want to predict)

# Split the data into 80% training and 20% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Training Samples: {len(X_train)}, Testing Samples: {len(X_test)}")


Training Samples: 8000, Testing Samples: 2000


In [5]:
from sklearn.ensemble import RandomForestRegressor

# Initialize and train the model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

print("Model training complete!")


Model training complete!


In [6]:
from sklearn.metrics import mean_absolute_error, r2_score

# Predict on the test set
y_pred = model.predict(X_test)

# Calculate performance metrics
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Absolute Error: {mae:.2f}")
print(f"R² Score: {r2:.2f}")


Mean Absolute Error: 0.09
R² Score: 1.00


In [7]:
!git branch

fatal: not a git repository (or any of the parent directories): .git


In [8]:
!git clone https://github.com/jae-red21/FTL-Hackathon-1

Cloning into 'FTL-Hackathon-1'...
remote: Enumerating objects: 66, done.[K
remote: Counting objects: 100% (66/66), done.[K
remote: Compressing objects: 100% (53/53), done.[K
remote: Total 66 (delta 23), reused 38 (delta 7), pack-reused 0 (from 0)[K
Receiving objects: 100% (66/66), 14.64 KiB | 7.32 MiB/s, done.
Resolving deltas: 100% (23/23), done.


In [11]:
!git branch -a

* [32mmain[m
  [31mremotes/origin/HEAD[m -> origin/main
  [31mremotes/origin/backend[m
  [31mremotes/origin/feature/base[m
  [31mremotes/origin/feature/dash-board[m
  [31mremotes/origin/feature/estimate[m
  [31mremotes/origin/feature/landing[m
  [31mremotes/origin/feature/pricing[m
  [31mremotes/origin/feature/tips[m
  [31mremotes/origin/flasksetup[m
  [31mremotes/origin/frontend[m
  [31mremotes/origin/main[m


In [1]:
!git feature-branch/model-training

git: 'feature-branch/model-training' is not a git command. See 'git --help'.
