# Import Modules

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import logging
from sklearn.svm import SVR
from sklearn.linear_model import Ridge, Lasso
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [3]:


# Define the path
data_path = '/content/drive/MyDrive/weatherStation/daily'



# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Define column name configurations
column_configurations = {
    'config1': [
        'Time', 'Wind Dir', 'Wind Spd', 'Wind Gust', 'Hum In', 'Humidity', 'Temp In', 'Temp', 'Raw Barom',
        'UV', 'Solar', 'Dew Point', 'DailyRain', 'RainRate'
    ],
    'config2': [
        'Time', 'Wind Dir', 'Wind Spd', 'Hum In', 'Humidity', 'Temp In', 'Temp', 'Raw Barom', 'Temp Ch 1',
        'Hum Ch 1', 'Temp Ch 2', 'Hum Ch 2', 'UV', 'Solar', 'Dew Point', 'RainRate'
    ],
    'config3': [
        'Time', 'Wind Dir', 'Wind Spd', 'Wind Gust', 'Hum In', 'Humidity', 'Temp In', 'Temp', 'Raw Barom',
        'UV', 'Solar', 'Dew Point', 'DailyRain', 'RainRate'
    ],
    'config4': [
        'Time', 'Wind Dir', 'Wind Spd', 'Hum In', 'Humidity', 'Temp In', 'Temp', 'Raw Barom', 'UV', 'Solar',
        'Dew Point', 'DailyRain', 'RainRate'
    ]
}

# Initialize an empty list to store all dataframes
dfs = []

# Iterate through each file in the directory
for filename in os.listdir(data_path):
    if filename.endswith(".txt"):
        file_path = os.path.join(data_path, filename)

        try:
            # Read the first line to get the date
            with open(file_path, 'r', encoding='latin1') as f:
                first_line = f.readline().strip()
                # Skip the second line containing units
                f.readline()

            # Extract the date from the first line
            try:
                file_date_from_line = pd.to_datetime(first_line, format='%d/%m/%y').date()
            except ValueError:
                logging.warning(f"Date format in file '{filename}' is not recognized. Skipping.")
                continue

            # Read the remaining data into a DataFrame with a placeholder for column names
            df = pd.read_csv(file_path, delimiter='\s+', skiprows=3, encoding='latin1', header=None, on_bad_lines='skip')

            # Check which column configuration matches the DataFrame
            for config_name, columns in column_configurations.items():
                if df.shape[1] == len(columns):
                    df.columns = columns
                    df['Date'] = file_date_from_line
                    dfs.append(df)
                    logging.info(f"File '{filename}' processed with column configuration '{config_name}'.")
                    break
            else:
                logging.warning(f"File '{filename}' does not match any known column configuration. Skipping.")

        except (UnicodeDecodeError, pd.errors.EmptyDataError, pd.errors.ParserError, ValueError) as e:
            logging.error(f"Could not read {file_path}: {e}")

# Concatenate all dataframes into a single dataframe, ignoring empty or all-NA entries
if dfs:
    weather_data = pd.concat(dfs, ignore_index=True)
    logging.info(f"Successfully concatenated data into a single DataFrame with {weather_data.shape[0]} rows.")
else:
    logging.warning("No dataframes to concatenate.")

ERROR:root:Could not read /content/drive/MyDrive/weatherStation/daily/120825.txt: No columns to parse from file


In [4]:
# weather_data.to_csv('/content/drive/MyDrive/weatherStation/weather_data.csv')

In [5]:
weather_data = pd.read_csv('/content/drive/MyDrive/weatherStation/weather_data.csv')

  weather_data = pd.read_csv('/content/drive/MyDrive/weatherStation/weather_data.csv')


In [6]:
weather_data

Unnamed: 0.1,Unnamed: 0,Time,Wind Dir,Wind Spd,Wind Gust,Hum In,Humidity,Temp In,Temp,Raw Barom,UV,Solar,Dew Point,DailyRain,RainRate,Date,Temp Ch 1,Hum Ch 1,Temp Ch 2,Hum Ch 2
0,0,0:00,283.0,3.2,14.5,57.0,84.0,13.9,9.3,997.4,0.0,0,6.8,0.2,0.0,2022-04-06,,,,
1,1,0:01,206.0,8.0,16.1,57.0,84.0,13.9,9.3,997.4,0.0,0,6.8,0.0,0.0,2022-04-06,,,,
2,2,0:02,222.0,6.4,16.1,57.0,84.0,13.8,9.3,997.4,0.0,0,6.8,0.0,0.0,2022-04-06,,,,
3,3,0:03,220.0,11.3,19.3,57.0,84.0,13.8,9.3,997.4,0.0,0,6.8,0.0,0.0,2022-04-06,,,,
4,4,0:04,231.0,11.3,19.3,57.0,84.0,13.8,9.4,997.3,0.0,0,6.8,0.0,0.0,2022-04-06,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6512403,6512403,23:55,327.0,1.6,,55,78,20.3,13.2,1018.9,0.0,0,9.4,,0.0,2012-09-04,0.0,0.0,0.0,0.0
6512404,6512404,23:56,327.0,1.6,,55,78,20.3,13.2,1018.9,0.0,0,9.4,,0.0,2012-09-04,0.0,0.0,0.0,0.0
6512405,6512405,23:57,327.0,1.6,,55,78,20.3,13.2,1018.9,0.0,0,9.4,,0.0,2012-09-04,0.0,0.0,0.0,0.0
6512406,6512406,23:58,327.0,1.6,,55,78,20.3,13.2,1019.0,0.0,0,9.4,,0.0,2012-09-04,0.0,0.0,0.0,0.0
