In [6]:
# Import necessary libraries
from google.colab import files  # For file upload in Google Colab
import pandas as pd  # For data manipulation and analysis
from sklearn.model_selection import train_test_split  # For splitting data into training and testing sets
import xgboost as xgb  # For using the XGBoost algorithm
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score  # For evaluating model performance
import numpy as np  # For numerical operations

# Step 1: Upload the dataset
uploaded = files.upload()  # This will prompt you to upload a file in Google Colab

# Step 2: Load the uploaded file into a Pandas DataFrame
file_name = 'Germany Finantial Data1.xlsx'  # Name of the uploaded file
sheet1_df = pd.read_excel(file_name, sheet_name='Sheet1')  # Load the data from Sheet1 into a DataFrame

# Step 3: Convert the 'date' column to integers representing years
sheet1_df['date'] = sheet1_df['date'].astype(int)  # Ensure the 'date' column is in integer format

# Step 4: Verify the date range in the DataFrame
print("Date range in the DataFrame:")
print(sheet1_df['date'].min(), sheet1_df['date'].max())  # Print the minimum and maximum years in the dataset

# Step 5: Split the data into training (1990-2019) and testing (2020-2024) sets
train_df = sheet1_df[sheet1_df['date'] <= 2019]  # Training data: years up to 2019
test_df = sheet1_df[sheet1_df['date'] >= 2020]  # Testing data: years from 2020 onwards

# Step 6: Check if we have data for the required years
print("Training data years:")
print(train_df['date'].unique())  # Print unique years in the training set
print("Testing data years:")
print(test_df['date'].unique())  # Print unique years in the testing set

# Step 7: Prepare features (X) and target (y) for training
X_train = train_df.drop(['GDP Growth (%)', 'date'], axis=1)  # Features: all columns except 'GDP Growth (%)' and 'date'
y_train = train_df['GDP Growth (%)']  # Target: 'GDP Growth (%)'

# Step 8: Prepare features (X) and target (y) for testing
X_test = test_df.drop(['GDP Growth (%)', 'date'], axis=1)  # Features: all columns except 'GDP Growth (%)' and 'date'
y_test = test_df['GDP Growth (%)']  # Target: 'GDP Growth (%)'

# Step 9: Initialize and train the XGBoost Regressor model
xgb_model = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100, random_state=42)  # Define the model
xgb_model.fit(X_train, y_train)  # Train the model on the training data

# Step 10: Make predictions on the test set
y_pred = xgb_model.predict(X_test)  # Predict GDP growth for the test set

# Step 11: Evaluate the model's performance using metrics
mae = mean_absolute_error(y_test, y_pred)  # Calculate Mean Absolute Error (MAE)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))  # Calculate Root Mean Squared Error (RMSE)
r2 = r2_score(y_test, y_pred)  # Calculate R-squared (R²)

# Step 12: Print evaluation metrics
print(f"Mean Absolute Error (MAE): {mae}")
print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"R-squared (R²): {r2}")

# Step 13: Extract the corresponding years for the test set
test_years = test_df['date']  # Get the years for the test set

# Step 14: Display actual vs. predicted GDP growth with years
results_df = pd.DataFrame({
    'Year': test_years,  # Years in the test set
    'Actual': y_test,  # Actual GDP growth values
    'Predicted': y_pred  # Predicted GDP growth values
}).sort_values('Year')  # Sort the results by year

print("\nActual vs. Predicted GDP Growth:")
print(results_df)  # Print the comparison of actual and predicted values

# Step 15: Prepare data for predicting GDP growth in 2025
future_year = 2025  # Year for which we want to predict GDP growth

# Use the mean of the training data as baseline features for prediction
future_data = pd.DataFrame([X_train.mean().values], columns=X_train.columns)

# Step 16: Predict GDP growth for 2025
predicted_gdp_2025 = xgb_model.predict(future_data)[0]  # Predict GDP growth using the trained model

# Step 17: Display the prediction for 2025
print(f"Predicted GDP Growth for {future_year}: {predicted_gdp_2025:.2f}%")

Saving Germany Finantial Data1.xlsx to Germany Finantial Data1 (4).xlsx
Date range in the DataFrame:
1990 2024
Training data years:
[1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003
 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017
 2018 2019]
Testing data years:
[2020 2021 2022 2023 2024]
Mean Absolute Error (MAE): 0.709502475120946
Root Mean Squared Error (RMSE): 0.872944744063455
R-squared (R²): 0.8816001787067614

Actual vs. Predicted GDP Growth:
    Year    Actual  Predicted
30  2020 -4.095137  -5.543857
31  2021  3.670000   3.243220
32  2022  1.369731   1.182358
33  2023 -0.266438  -0.566013
34  2024 -0.200000   0.985066
Predicted GDP Growth for 2025: 1.64%


In [7]:
# Import necessary libraries
from google.colab import files  # For file upload in Google Colab
import pandas as pd  # For data manipulation and analysis
import xgboost as xgb  # For using the XGBoost algorithm
import numpy as np  # For numerical operations

# Step 1: Upload the dataset
uploaded = files.upload()  # This will prompt you to upload a file in Google Colab

# Step 2: Load the uploaded file into a Pandas DataFrame
file_name = 'Germany Finantial Data1.xlsx'  # Name of the uploaded file
sheet1_df = pd.read_excel(file_name, sheet_name='Sheet1')  # Load the data from Sheet1 into a DataFrame

# Step 3: Convert the 'date' column to integers representing years
sheet1_df['date'] = sheet1_df['date'].astype(int)  # Ensure the 'date' column is in integer format

# Step 4: Verify the date range in the DataFrame
print("Date range in the DataFrame:")
print(sheet1_df['date'].min(), sheet1_df['date'].max())  # Print the minimum and maximum years in the dataset

# Step 5: Prepare features (X) and target (y) for the entire dataset
X = sheet1_df.drop(['GDP Growth (%)', 'date'], axis=1)  # Features: all columns except 'GDP Growth (%)' and 'date'
y = sheet1_df['GDP Growth (%)']  # Target: 'GDP Growth (%)'

# Step 6: Train the XGBoost Regressor on the entire dataset
xgb_model = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100, random_state=42)  # Define the model
xgb_model.fit(X, y)  # Train the model on the entire dataset

# Step 7: Predict GDP Growth for 2025
future_year = 2025  # Year for which we want to predict GDP growth

# Step 8: Use the mean of historical features as the baseline for 2025 prediction
future_data = pd.DataFrame([X.mean().values], columns=X.columns)  # Create a DataFrame with mean feature values

# Step 9: Make the prediction for 2025
predicted_gdp = xgb_model.predict(future_data)  # Predict GDP growth for 2025 using the trained model

# Step 10: Display the prediction
print(f"Predicted GDP Growth for {future_year}: {predicted_gdp[0]:.2f}%")  # Print the predicted GDP growth for 2025

Saving Germany Finantial Data1.xlsx to Germany Finantial Data1 (5).xlsx
Date range in the DataFrame:
1990 2024
Predicted GDP Growth for 2025: 1.44%
