In [None]:
import pandas as pd
df = pd.read_csv('../trainingData/GlobalLandTemperaturesByCity.csv')
print(df.shape)
print(df.isnull().sum())

In [None]:
df.dropna(inplace=True)
df['dt'] = pd.to_datetime(df['dt'])

# Extract features like day of the year, month, etc.
df['day_of_year'] = df['dt'].dt.dayofyear
df['month'] = df['dt'].dt.month
df['year'] = df['dt'].dt.year
df['day_of_week'] = df['dt'].dt.dayofweek  # Monday=0, Sunday=6
df.head()

In [None]:
from sklearn.model_selection import train_test_split

# Assuming df is your DataFrame containing the dataset

# Reduce the dataset to every 4th entry
reduced_df = df.iloc[::4]

# Now, work with reduced_df instead of df
X = reduced_df.drop(['dt', 'AverageTemperature'], axis=1)  # Features
y = reduced_df['AverageTemperature']  # Target variable

# Split the dataset into training and testing sets
# Here, we are using 80% of the data for training and 20% for testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Optionally, you can print the shapes of the resulting datasets to verify the split
print("Training set - Features shape:", X_train.shape)
print("Training set - Target shape:", y_train.shape)
print("Testing set - Features shape:", X_test.shape)
print("Testing set - Target shape:", y_test.shape)

In [None]:
# Drop 'City' and 'Country' columns
X_train = X_train.drop(['City', 'Country'], axis=1)
X_test = X_test.drop(['City', 'Country'], axis=1)
print("Training set - Features shape:", X_train.shape)
print("Training set - Target shape:", y_train.shape)
print("Testing set - Features shape:", X_test.shape)
print("Testing set - Target shape:", y_test.shape)

In [None]:
# Define a function to convert latitude and longitude values to numeric format
def convert_to_numeric(value):
    if isinstance(value, float):  # Check if value is already numeric
        return value
    direction = 1  # Assume positive direction by default
    if value.endswith('S') or value.endswith('W'):
        direction = -1  # Negative direction for South and West
    return direction * float(value[:-1])  # Convert value to float and apply direction


# Convert 'Latitude' and 'Longitude' columns to numeric format
X_train['Latitude'] = X_train['Latitude'].apply(convert_to_numeric)
X_train['Longitude'] = X_train['Longitude'].apply(convert_to_numeric)

X_test['Latitude'] = X_test['Latitude'].apply(convert_to_numeric)
X_test['Longitude'] = X_test['Longitude'].apply(convert_to_numeric)


In [None]:
features = ['Latitude', 'Longitude', 'day_of_year', 'month', 'year', 'day_of_week']
target = 'AverageTemperature'

In [None]:
X = df[features]
y = df[target]

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Ensure the input shape matches the number of features in your dataset
model = Sequential([
    Dense(64, activation='relu', input_shape=(7,), name='InputLayer'),  # Updated input shape to 7
    Dense(32, activation='relu'),
    Dense(1)
])

model.compile(optimizer='adam', loss='mse')



In [None]:
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=5, batch_size=32)


In [None]:
import numpy as np

# Make predictions on the test set
predictions = model.predict(X_test)

# Calculate MAE, MSE, and RMSE
mae = np.mean(np.abs(predictions - y_test))
mse = np.mean((predictions - y_test)**2)
rmse = np.sqrt(mse)

print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Square Error (RMSE): {rmse}")