In [20]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
df = pd.read_csv("Lab 3 Dataset.csv", skiprows=4)

print(df.head().to_markdown(index=False, numalign="left", stralign="left"))
print(df.info())

| Country Name                | Country Code   | Indicator Name    | Indicator Code   | 1960        | 1961        | 1962        | 1963        | 1964        | 1965        | 1966        | 1967        | 1968        | 1969        | 1970        | 1971        | 1972        | 1973        | 1974        | 1975        | 1976        | 1977        | 1978        | 1979        | 1980        | 1981        | 1982        | 1983        | 1984        | 1985        | 1986        | 1987        | 1988        | 1989        | 1990        | 1991        | 1992        | 1993        | 1994        | 1995        | 1996        | 1997        | 1998        | 1999        | 2000        | 2001        | 2002        | 2003        | 2004        | 2005        | 2006        | 2007        | 2008        | 2009        | 2010        | 2011        | 2012        | 2013        | 2014        | 2015        | 2016        | 2017        | 2018        | 2019        | 2020        | 2021        | 2022        | 2023   | Unnamed: 68   |
|:---

In [21]:
clean_df = df.drop(columns=['2023', 'Unnamed: 68'])
clean_df = clean_df[clean_df['Indicator Name'] == 'Population, total'].copy()

selected_country = 'Angola'
df_country = clean_df[clean_df['Country Name'] == selected_country].copy()

df_country_t = df_country.drop(columns=['Country Name', 'Country Code', 'Indicator Name', 'Indicator Code']).T
df_country_t = df_country_t.iloc[1:].copy()
df_country_t = df_country_t.reset_index()
df_country_t.columns = ['Year', 'Population']
df_country_t['Year'] = pd.to_numeric(df_country_t['Year'])
df_country_t['Population'] = pd.to_numeric(df_country_t['Population'], errors='coerce')
df_country_t.dropna(inplace=True)

print(df_country_t.head())
print(df_country_t.info())

   Year  Population
0  1961   5441333.0
1  1962   5521400.0
2  1963   5599827.0
3  1964   5673199.0
4  1965   5736582.0
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 62 entries, 0 to 61
Data columns (total 2 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Year        62 non-null     int64  
 1   Population  62 non-null     float64
dtypes: float64(1), int64(1)
memory usage: 1.1 KB
None


In [22]:
scaler = StandardScaler()
df_country_t[['Year', 'Population']] = scaler.fit_transform(df_country_t[['Year', 'Population']])

X = []
y = []
sequence_length = 4  

for i in range(len(df_country_t) - sequence_length):
    X.append(df_country_t['Population'][i:i+sequence_length])
    y.append(df_country_t['Population'][i+sequence_length])

X, y = np.array(X), np.array(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)  # Don't shuffle for time series

model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(sequence_length,)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1)
])
model.compile(optimizer='adam', loss='mse')
model.fit(X_train, y_train, epochs=100, validation_split=0.4)

y_pred = model.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

print('MAE:', mae)
print('RMSE:', rmse)

Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 7s/step - loss: 0.1634 - val_loss: 0.2865
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 353ms/step - loss: 0.1411 - val_loss: 0.2876
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 351ms/step - loss: 0.1206 - val_loss: 0.2888
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 322ms/step - loss: 0.1017 - val_loss: 0.2901
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 332ms/step - loss: 0.0846 - val_loss: 0.2915
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 480ms/step - loss: 0.0693 - val_loss: 0.2930
Epoch 7/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 404ms/step - loss: 0.0558 - val_loss: 0.2946
Epoch 8/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 348ms/step - loss: 0.0440 - val_loss: 0.2963
Epoch 9/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[