In [1]:
pip install scikit-learn

Collecting scikit-learn
  Downloading scikit_learn-1.6.1-cp312-cp312-win_amd64.whl.metadata (15 kB)
Collecting joblib>=1.2.0 (from scikit-learn)
  Downloading joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Downloading threadpoolctl-3.6.0-py3-none-any.whl.metadata (13 kB)
Downloading scikit_learn-1.6.1-cp312-cp312-win_amd64.whl (11.1 MB)
   ---------------------------------------- 0.0/11.1 MB ? eta -:--:--
   - -------------------------------------- 0.5/11.1 MB 3.4 MB/s eta 0:00:04
   ------ --------------------------------- 1.8/11.1 MB 5.0 MB/s eta 0:00:02
   ------------ --------------------------- 3.4/11.1 MB 5.8 MB/s eta 0:00:02
   ------------------- -------------------- 5.5/11.1 MB 7.1 MB/s eta 0:00:01
   ------------------------------ --------- 8.4/11.1 MB 8.4 MB/s eta 0:00:01
   ---------------------------------------  11.0/11.1 MB 9.4 MB/s eta 0:00:01
   ---------------------------------------- 11.1/11.1 MB 9.0 MB/s eta 0:0


[notice] A new release of pip is available: 24.2 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns 
from sklearn.linear_model import LinearRegression 
from sklearn.metrics import mean_absolute_error, mean_squared_error 
from sklearn.model_selection import train_test_split 

 

# Last inn datasettet 

df = pd.read_csv("trondheim_vaerdata_full_1.csv", parse_dates = ['Tidspunkt'])
print(df.columns)

df['year'] = df['Tidspunkt'].dt.year 

df['day_of_year'] = df['Tidspunkt'].dt.dayofyear 

 

# Håndter manglende verdier 

df['Temperatur (°C)'] = df['Temperatur (°C)'].interpolate() 

df = df.dropna(subset=['Temperatur (°C)'])  # Dropper rader uten temperatur 

 

# Visualisering 1: Linjediagram (temperatur over tid) 

plt.figure(figsize=(12, 4)) 

sns.lineplot(data=df, x='Tidspunkt', y='Temperatur (°C)') 

plt.title("Daglig gjennomsnittstemperatur (16.01.2025 - 16.03.2025)") 

plt.xlabel("Dato") 

plt.ylabel("Temperatur (°C)") 

plt.tight_layout() 

plt.show() 

 

# Visualisering 2: Søylediagram (månedlig nedbør) 

monthly_precip = df.resample('M', on='Tidspunkt').sum(numeric_only=True) 

plt.figure(figsize=(10, 4)) 

monthly_precip['Nedbør (mm)'].plot(kind='bar') 

plt.title("Månedlig nedbør (mm)") 

plt.ylabel("Nedbør (mm)") 

plt.tight_layout() 

plt.show() 

 

# Visualisering 3: Scatterplot (temperatur vs. nedbør) 

plt.figure(figsize=(8, 5)) 

sns.scatterplot(data=df, x='Nedbør (mm)', y='Temperatur (°C)', alpha=0.5) 

plt.title("Temperatur vs. nedbør") 

plt.xlabel("Nedbør (mm)") 

plt.ylabel("Temperatur (°C)") 

plt.tight_layout() 

plt.show() 

 

# ---- Modelltrening ---- 

# Velg features og target 

features = df[['year', 'day_of_year']] 

target = df['Temperatur (°C)'] 

 

# Tren på data før 2025, test på 2025 

X_train = features[df['year'] < 2025] 

X_test = features[df['year'] == 2025] 

y_train = target[df['year'] < 2025] 

y_test = target[df['year'] == 2025] 

 

# Tren modellen 

model = LinearRegression() 

model.fit(X_train, y_train) 

 

# Evaluer modellen 

y_pred = model.predict(X_test) 

print("MAE:", mean_absolute_error(y_test, y_pred)) 

print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred))) 

 

# ---- Prediksjon for 2026 ---- 

future_2026 = pd.DataFrame({ 

    'year': [2026] * 365, 

    'day_of_year': list(range(1, 366))  # 2026 er skuddår 

}) 

 

future_2026['predicted_temperature'] = model.predict(future_2026) 

 

# Visualiser prediksjonen 

plt.figure(figsize=(10, 4)) 

plt.plot(future_2026['day_of_year'], future_2026['predicted_temperature']) 

plt.title("Forventet daglig temperatur i 2026 (lineær regresjon)") 

plt.xlabel("Dag i året") 

plt.ylabel("Predikert temperatur (°C)") 

plt.tight_layout() 

plt.show()

FileNotFoundError: [Errno 2] No such file or directory: 'trondheim_vaerdata_full_1.csv'