In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Upload data

Here is a summary of the dataset in points:

- <font color = "cyan"> Dataset Overview </font>
  - Contains temperature data for **35 European cities**.
  - Cities are divided into:
    - **Capitals**: Rows 1 to 24 (e.g., Amsterdam to Stockholm).
    - **Major cities (non-capitals)**: Rows 25 to 35 (e.g., Antwerp to Zurich).

- <font color = "cyan"> Variables </font>
  - **Quantitative**: 15 variables.
    - Indexed from 1 (January) to 12 (December) for monthly temperatures.
    - Variables 13 (Average Temperature) to 16 (Longitude) are supplementary.
  - **Qualitative**: 1 variable (`Région`), indicating the geographic location of the city.

- <font color = "cyan"> Scope of the Exercise </font>:
  - Focuses only on **European capitals** (23 rows).
  - Analyzes variables **1 to 12** (monthly temperatures).

- <font color = "cyan">Filtered Dataset this Study </font>
  - Rows: **23** (European capitals only).
  - Columns: **12** (Monthly temperatures).

In [3]:
data_path = "/content/drive/MyDrive/e-business & Big Data/Machine Learning/Mini Project/data/temperature.xlsx"

# Import data
data = pd.read_excel(data_path)[:23]
data

Unnamed: 0,Ville,Janvier,Février,Mars,Avril,Mai,Juin,Juillet,Août,Septembre,Octobre,Novembre,Décembre,Moyenne,Amplitude,Latitude,Longitude,Région
0,Amsterdam,2.9,2.5,5.7,8.2,12.5,14.8,17.1,17.1,14.5,11.4,7.0,4.4,9.9,14.6,52.2,4.5,Ouest
1,Athènes,9.1,9.7,11.7,15.4,20.1,24.5,27.4,27.2,23.8,19.2,14.6,11.0,17.8,18.3,37.6,23.5,Sud
2,Berlin,-0.2,0.1,4.4,8.2,13.8,16.0,18.3,18.0,14.4,10.0,4.2,1.2,9.1,18.5,52.3,13.2,Ouest
3,Bruxelles,3.3,3.3,6.7,8.9,12.8,15.6,17.8,17.8,15.0,11.1,6.7,4.4,10.3,14.4,50.5,4.2,Ouest
4,Budapest,-1.1,0.8,5.5,11.6,17.0,20.2,22.0,21.3,16.9,11.3,5.1,0.7,10.9,23.1,47.3,19.0,Est
5,Copenhague,-0.4,-0.4,1.3,5.8,11.1,15.4,17.1,16.6,13.3,8.8,4.1,1.3,7.8,17.5,55.4,12.3,Nord
6,Dublin,4.8,5.0,5.9,7.8,10.4,13.3,15.0,14.6,12.7,9.7,6.7,5.4,9.3,10.2,53.2,6.1,Nord
7,Helsinki,-5.8,-6.2,-2.7,3.1,10.2,14.0,17.2,14.9,9.7,5.2,0.1,-2.3,4.8,23.4,60.1,25.0,Nord
8,Kiev,-5.9,-5.0,-0.3,7.4,14.3,17.8,19.4,18.5,13.7,7.5,1.2,-3.6,7.1,25.3,50.3,30.3,Est
9,Cracovie,-3.7,-2.0,1.9,7.9,13.2,16.9,18.4,17.6,13.7,8.6,2.6,-1.7,7.7,22.1,50.0,19.6,Est


In [4]:
X = data.values[:,1:-5]
X

array([[2.9, 2.5, 5.7, 8.2, 12.5, 14.8, 17.1, 17.1, 14.5, 11.4, 7.0, 4.4],
       [9.1, 9.7, 11.7, 15.4, 20.1, 24.5, 27.4, 27.2, 23.8, 19.2, 14.6,
        11.0],
       [-0.2, 0.1, 4.4, 8.2, 13.8, 16.0, 18.3, 18.0, 14.4, 10.0, 4.2,
        1.2],
       [3.3, 3.3, 6.7, 8.9, 12.8, 15.6, 17.8, 17.8, 15.0, 11.1, 6.7, 4.4],
       [-1.1, 0.8, 5.5, 11.6, 17.0, 20.2, 22.0, 21.3, 16.9, 11.3, 5.1,
        0.7],
       [-0.4, -0.4, 1.3, 5.8, 11.1, 15.4, 17.1, 16.6, 13.3, 8.8, 4.1,
        1.3],
       [4.8, 5.0, 5.9, 7.8, 10.4, 13.3, 15.0, 14.6, 12.7, 9.7, 6.7, 5.4],
       [-5.8, -6.2, -2.7, 3.1, 10.2, 14.0, 17.2, 14.9, 9.7, 5.2, 0.1,
        -2.3],
       [-5.9, -5.0, -0.3, 7.4, 14.3, 17.8, 19.4, 18.5, 13.7, 7.5, 1.2,
        -3.6],
       [-3.7, -2.0, 1.9, 7.9, 13.2, 16.9, 18.4, 17.6, 13.7, 8.6, 2.6,
        -1.7],
       [10.5, 11.3, 12.8, 14.5, 16.7, 19.4, 21.5, 21.9, 20.4, 17.4, 13.7,
        11.1],
       [3.4, 4.2, 5.5, 8.3, 11.9, 15.1, 16.9, 16.5, 14.0, 10.2, 6.3, 4.4],
       [5.0, 6.6