In [1]:
import pandas as pd

# Load dataset
file_path = "carbon_emissions_small.csv"
df = pd.read_csv(file_path)

# Display first few rows
print(df.head())

# Check dataset info
print(df.info())

# Check for missing values
print(df.isnull().sum())

   Year  Country  Energy_Consumption  Industrial_Production  Transportation  \
0  2029      USA                5000                    120             195   
1  2030    India                4000                    141             250   
2  2020      USA                4000                    110             150   
3  2027  Germany                3200                    128             135   
4  2020   Brazil                1500                    105              80   

   Weather_Patterns  Carbon_Emissions  
0                96              6100  
1                98              7500  
2               105              5200  
3               100              4200  
4               102              2200  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 66 entries, 0 to 65
Data columns (total 7 columns):
 #   Column                 Non-Null Count  Dtype 
---  ------                 --------------  ----- 
 0   Year                   66 non-null     int64 
 1   Country                66

In [2]:
# Drop rows with missing values
df = df.dropna()

# Convert categorical variables to numeric using one-hot encoding
df = pd.get_dummies(df, drop_first=True)

# Normalize numeric features
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
df[df.columns] = scaler.fit_transform(df[df.columns])

In [5]:
from sklearn.model_selection import train_test_split

# Define target (assume 'Emissions' is the target variable)
X = df.drop(columns=['Carbon_Emissions'])
y = df['Carbon_Emissions']

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:
print(df.columns)


Index(['Year', 'Energy_Consumption', 'Industrial_Production', 'Transportation',
       'Weather_Patterns', 'Carbon_Emissions', 'Country_China',
       'Country_Germany', 'Country_India', 'Country_Japan', 'Country_USA'],
      dtype='object')


In [6]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Train model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Evaluate model
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

Mean Squared Error: 0.00016581548793087337


In [7]:
import pickle
pickle.dump(model, open('carbon_model.pkl', 'wb'))