# Obiettivo
Modificare la disposizione dei dati senza modificarne il significato e ricondurli ad una forma utile ad ulteriori analisi ed elaborazioni.
### Colonne

| Nome              | Tipo      | Descrizione                                                            |
|:------------------|:----------|:-----------------------------------------------------------------------|
| Post              | int       | Identificativo del sensore                                             |
| Run               | int       | Identificativo di un intervallo di misura                              |
| Timestamp         | Timestamp | Giorno e ora della Run                                                 |
| Degrees           | float     | Distanza in gradi tra un sensore ed il successivo                      |
| Azimuth           | float     | Angolo azimuth della posizione del sensore                             |
| Wind-Dir          | float     | Distribuzione di frequenza del vento                                   |
| Concentration     | float     | Concentrazione in mg/m3                                                |
| Distance          | float     | Distanza dalla sorgente in m                                           |
| U                 | float     | Velocità orizzontale del vento a 2 m di altezza dal suolo in m/s       |
| Se                | float     | Deviazione standard dell'angolo orizzontale del vento                  |
| Sa                | float     | Deviazione standard dell'angolo verticale del vento                    |
| U1.5              | float     | Velocità orizzontale del vento a 1.5 m di altezza dal suolo in m/s     |
| U3.0              | float     | Velocità orizzontale del vento a 3 m di altezza dal suolo in m/s       |
| U6.0              | float     | Velocità orizzontale del vento a 6 m di altezza dal suolo in m/s       |
| U12.0             | float     | Velocità orizzontale del vento a 12 m di altezza dal suolo in m/s      |
| T1.5              | float     | Temperatura dell'aria a 1.5 m di altezza dal suolo in gradi centigradi |
| T3.0              | float     | Temperatura dell'aria a 3 m di altezza dal suolo in gradi centigradi   |
| T6.0              | float     | Temperatura dell'aria a 6 m di altezza dal suolo in gradi centigradi   |
| T12.0             | float     | Temperatura dell'aria a 12 m di altezza dal suolo in gradi centigradi  |
| Correction Factor | float     | Fattore di correzione moltiplicativo da 0 a 1                          |

In [1]:
import pandas as pd

def parse_data(
        data: pd.DataFrame,
        columns,
        concentration_columns,
        concentration_distances,
        to_remove = []
):
    # Rename columns and drop irrelevant columns
    data = data.iloc[:, 0:len(columns)]
    data.columns = columns

    data.drop(columns = to_remove, inplace = True)

    # Remove null values
    data = data[data[columns[0]].notna()]

    months = ['january', 'february', 'march', 'april', 'may', 'june', 'july', 'august', 'september', 'october', 'november', 'december']

    # Set timestamps and runs
    runs = []
    timestamps = []
    current_timestamp = None
    current_run = None
    for i, r in data.iterrows():
        timestamps.append(current_timestamp)
        runs.append(current_run)
        if str(r.iloc[1]).strip().lower() in months:
            current_timestamp = pd.Timestamp(
                day=int(r.iloc[0]),
                month=months.index(r.iloc[1].strip().lower())+1,
                year=int(r.iloc[2]),
                hour=int(r.iloc[4].split('-')[0][0:2]),
                minute=int(r.iloc[4].split('-')[0][2:4])
            )
            current_run = int(r.iloc[3].strip().lower().replace('run ', ''))
    data['timestamp'] = timestamps
    data['run'] = runs

    # Remove initial header values
    data.reset_index(inplace=True, drop=True)
    first_index = 0
    for i, r in data.iterrows():
        if str(r[columns[0]]).strip().lower() == 'post':
            first_index = i-1
            break

    data = data.iloc[first_index:, :]
    data.reset_index(inplace=True, drop=True)

    # Remove non-numeric values
    to_remove = []
    for i, r in data.iterrows():
        if not isinstance(r[columns[0]], int) or not isinstance(r[columns[1]], (float, int)):
            to_remove.append(i)
    data.drop(to_remove, inplace=True)

    # Flatten concentration values
    rows = []

    for i, r in data.iterrows():
        for col, d in zip(concentration_columns, concentration_distances):
            new_row = r.copy(deep=True)
            new_row['concentration'] = r[col]
            new_row['distance'] = d
            rows.append(new_row)

    data = pd.DataFrame(rows)

    # Remove duplicate concentration columns
    data.drop(columns = concentration_columns, inplace = True)

    return data

In [2]:
def add_additional_data(data: pd.DataFrame, additional_data: pd.DataFrame, columns):
    # Drop first column
    additional_data.drop(columns = [additional_data.columns[0]], inplace=True)
    # Rename columns
    additional_data.columns = columns

    # Remove non-numeric values
    to_remove = []
    for i, r in additional_data.iterrows():
        if not isinstance(r[columns[0]], int) or not isinstance(r[columns[1]], (float, int)):
            to_remove.append(i)
    additional_data.drop(to_remove, inplace=True)
    additional_data.reset_index(inplace=True, drop=True)

    # Add additional data
    columns_present = columns[1:]
    added = [[] for c in columns_present]
    for i, row in data.iterrows():
        for j, c in enumerate(columns_present):
            added[j].append(additional_data.loc[int(row['run'])-1, c])

    for i, c in enumerate(added):
        data[columns_present[i]] = c

    return data

In [3]:
from util import load_from_excel, apply_and_save, save_to_csv

fname1 = 'RHILL1Update01.xls'
fname2 = 'RHILL2Update01.xls'

correction1 = load_from_excel(fname1, sheet_name=0)
meteo1 = load_from_excel(fname1, sheet_name=1)
data1 = load_from_excel(fname1, sheet_name=2)

correction2 = load_from_excel(fname2, sheet_name=0)
meteo2 = load_from_excel(fname2, sheet_name=1)
data2 = load_from_excel(fname2, sheet_name=2)

parsed1 = apply_and_save(
    data1, parse_data, 'data1_parsed.csv',
    columns=['post', 'degrees', 'azimuth', 'wind_direction', 'concentration_50', 'concentration_100', 'concentration_200'],
    concentration_columns=['concentration_50', 'concentration_100', 'concentration_200'],
    concentration_distances=[50, 100, 200]
)

parsed2 = apply_and_save(
    data2, parse_data, 'data2_parsed.csv',
    columns=['post', 'degrees', 'azimuth', 'wind_direction', 'concentration_50_10', 'concentration_100_10', 'concentration_200_10'],
    concentration_columns=['concentration_50_10', 'concentration_100_10', 'concentration_200_10'],
    concentration_distances=[50, 100, 200],
    to_remove = []
)

parsed1_meteo = add_additional_data(parsed1, meteo1, columns=['run', 'U', 'Se', 'Sa', 'U1.5', 'U3.0', 'U6.0', 'U12.0', 'T1.5', 'T3.0', 'T6.0', 'T12.0'])
parsed1_meteo_corrected = add_additional_data(parsed1_meteo, correction1, columns=['run', 'q', 'correction_50', 'correction_100', 'correction_200'])

parsed2_meteo = add_additional_data(parsed2, meteo2, columns=['run', 'U', 'Sa', 'U1.5', 'U3.0', 'U6.0', 'U12.0', 'T1.5', 'T3.0', 'T6.0', 'T12.0'])
parsed2_meteo_corrected = add_additional_data(parsed2_meteo, correction2, columns=['run', 'source_strength_10min', 'source_strength_3min', 'source_strength_1.5min', 'correction_10min', 'correction_3min', 'correction_1.5min'])

In [4]:
parsed1

Unnamed: 0,post,degrees,azimuth,wind_direction,timestamp,run,concentration,distance,U,Se,...,U6.0,U12.0,T1.5,T3.0,T6.0,T12.0,q,correction_50,correction_100,correction_200
3,7,18,43,,1954-08-13 09:15:00,1.0,,50,2.17,11.2,...,-99.00,-99.00,-99.0,-99.00,-99.00,-99.00,1.00,0.93,0.93,0.92
3,7,18,43,,1954-08-13 09:15:00,1.0,,100,2.17,11.2,...,-99.00,-99.00,-99.0,-99.00,-99.00,-99.00,1.00,0.93,0.93,0.92
3,7,18,43,,1954-08-13 09:15:00,1.0,,200,2.17,11.2,...,-99.00,-99.00,-99.0,-99.00,-99.00,-99.00,1.00,0.93,0.93,0.92
4,8,21,46,,1954-08-13 09:15:00,1.0,,50,2.17,11.2,...,-99.00,-99.00,-99.0,-99.00,-99.00,-99.00,1.00,0.93,0.93,0.92
4,8,21,46,,1954-08-13 09:15:00,1.0,,100,2.17,11.2,...,-99.00,-99.00,-99.0,-99.00,-99.00,-99.00,1.00,0.93,0.93,0.92
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1016,30,87,112,0.17,1955-11-08 23:10:00,29.0,,100,1.78,3.2,...,2.62,3.11,2.6,3.02,3.29,3.44,7.96,0.96,0.96,0.97
1016,30,87,112,0.17,1955-11-08 23:10:00,29.0,,200,1.78,3.2,...,2.62,3.11,2.6,3.02,3.29,3.44,7.96,0.96,0.96,0.97
1017,31,90,115,,1955-11-08 23:10:00,29.0,,50,1.78,3.2,...,2.62,3.11,2.6,3.02,3.29,3.44,7.96,0.96,0.96,0.97
1017,31,90,115,,1955-11-08 23:10:00,29.0,,100,1.78,3.2,...,2.62,3.11,2.6,3.02,3.29,3.44,7.96,0.96,0.96,0.97


In [5]:
parsed2

Unnamed: 0,post,degrees,azimuth,wind_direction,timestamp,run,concentration,distance,U,Sa,...,T1.5,T3.0,T6.0,T12.0,source_strength_10min,source_strength_3min,source_strength_1.5min,correction_10min,correction_3min,correction_1.5min
3,37,1.5,76.5,,1957-09-24 19:35:00,1.0,,50,2.14,8.4,...,12.12,12.51,12.73,12.94,45.1,45.1,45.1,0.90,0.92,0.83
3,37,1.5,76.5,,1957-09-24 19:35:00,1.0,,100,2.14,8.4,...,12.12,12.51,12.73,12.94,45.1,45.1,45.1,0.90,0.92,0.83
3,37,1.5,76.5,,1957-09-24 19:35:00,1.0,,200,2.14,8.4,...,12.12,12.51,12.73,12.94,45.1,45.1,45.1,0.90,0.92,0.83
4,38,1.5,78.0,0.42,1957-09-24 19:35:00,1.0,,50,2.14,8.4,...,12.12,12.51,12.73,12.94,45.1,45.1,45.1,0.90,0.92,0.83
4,38,1.5,78.0,0.42,1957-09-24 19:35:00,1.0,,100,2.14,8.4,...,12.12,12.51,12.73,12.94,45.1,45.1,45.1,0.90,0.92,0.83
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
604,78,1.5,138.0,0.42,1957-12-03 11:20:00,10.0,,100,4.07,20.7,...,3.75,3.37,3.16,2.93,93.4,97.5,97.5,0.94,0.96,0.97
604,78,1.5,138.0,0.42,1957-12-03 11:20:00,10.0,,200,4.07,20.7,...,3.75,3.37,3.16,2.93,93.4,97.5,97.5,0.94,0.96,0.97
605,79,1.5,139.5,,1957-12-03 11:20:00,10.0,,50,4.07,20.7,...,3.75,3.37,3.16,2.93,93.4,97.5,97.5,0.94,0.96,0.97
605,79,1.5,139.5,,1957-12-03 11:20:00,10.0,,100,4.07,20.7,...,3.75,3.37,3.16,2.93,93.4,97.5,97.5,0.94,0.96,0.97


In [6]:
parsed1_meteo_corrected

Unnamed: 0,post,degrees,azimuth,wind_direction,timestamp,run,concentration,distance,U,Se,...,U6.0,U12.0,T1.5,T3.0,T6.0,T12.0,q,correction_50,correction_100,correction_200
3,7,18,43,,1954-08-13 09:15:00,1.0,,50,2.17,11.2,...,-99.00,-99.00,-99.0,-99.00,-99.00,-99.00,1.00,0.93,0.93,0.92
3,7,18,43,,1954-08-13 09:15:00,1.0,,100,2.17,11.2,...,-99.00,-99.00,-99.0,-99.00,-99.00,-99.00,1.00,0.93,0.93,0.92
3,7,18,43,,1954-08-13 09:15:00,1.0,,200,2.17,11.2,...,-99.00,-99.00,-99.0,-99.00,-99.00,-99.00,1.00,0.93,0.93,0.92
4,8,21,46,,1954-08-13 09:15:00,1.0,,50,2.17,11.2,...,-99.00,-99.00,-99.0,-99.00,-99.00,-99.00,1.00,0.93,0.93,0.92
4,8,21,46,,1954-08-13 09:15:00,1.0,,100,2.17,11.2,...,-99.00,-99.00,-99.0,-99.00,-99.00,-99.00,1.00,0.93,0.93,0.92
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1016,30,87,112,0.17,1955-11-08 23:10:00,29.0,,100,1.78,3.2,...,2.62,3.11,2.6,3.02,3.29,3.44,7.96,0.96,0.96,0.97
1016,30,87,112,0.17,1955-11-08 23:10:00,29.0,,200,1.78,3.2,...,2.62,3.11,2.6,3.02,3.29,3.44,7.96,0.96,0.96,0.97
1017,31,90,115,,1955-11-08 23:10:00,29.0,,50,1.78,3.2,...,2.62,3.11,2.6,3.02,3.29,3.44,7.96,0.96,0.96,0.97
1017,31,90,115,,1955-11-08 23:10:00,29.0,,100,1.78,3.2,...,2.62,3.11,2.6,3.02,3.29,3.44,7.96,0.96,0.96,0.97


In [7]:
parsed2_meteo_corrected

Unnamed: 0,post,degrees,azimuth,wind_direction,timestamp,run,concentration,distance,U,Sa,...,T1.5,T3.0,T6.0,T12.0,source_strength_10min,source_strength_3min,source_strength_1.5min,correction_10min,correction_3min,correction_1.5min
3,37,1.5,76.5,,1957-09-24 19:35:00,1.0,,50,2.14,8.4,...,12.12,12.51,12.73,12.94,45.1,45.1,45.1,0.90,0.92,0.83
3,37,1.5,76.5,,1957-09-24 19:35:00,1.0,,100,2.14,8.4,...,12.12,12.51,12.73,12.94,45.1,45.1,45.1,0.90,0.92,0.83
3,37,1.5,76.5,,1957-09-24 19:35:00,1.0,,200,2.14,8.4,...,12.12,12.51,12.73,12.94,45.1,45.1,45.1,0.90,0.92,0.83
4,38,1.5,78.0,0.42,1957-09-24 19:35:00,1.0,,50,2.14,8.4,...,12.12,12.51,12.73,12.94,45.1,45.1,45.1,0.90,0.92,0.83
4,38,1.5,78.0,0.42,1957-09-24 19:35:00,1.0,,100,2.14,8.4,...,12.12,12.51,12.73,12.94,45.1,45.1,45.1,0.90,0.92,0.83
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
604,78,1.5,138.0,0.42,1957-12-03 11:20:00,10.0,,100,4.07,20.7,...,3.75,3.37,3.16,2.93,93.4,97.5,97.5,0.94,0.96,0.97
604,78,1.5,138.0,0.42,1957-12-03 11:20:00,10.0,,200,4.07,20.7,...,3.75,3.37,3.16,2.93,93.4,97.5,97.5,0.94,0.96,0.97
605,79,1.5,139.5,,1957-12-03 11:20:00,10.0,,50,4.07,20.7,...,3.75,3.37,3.16,2.93,93.4,97.5,97.5,0.94,0.96,0.97
605,79,1.5,139.5,,1957-12-03 11:20:00,10.0,,100,4.07,20.7,...,3.75,3.37,3.16,2.93,93.4,97.5,97.5,0.94,0.96,0.97


In [8]:
save_to_csv(parsed1_meteo_corrected, 'full_parsed1.csv')
save_to_csv(parsed2_meteo_corrected, 'full_parsed2.csv')

In [9]:
parsed1_meteo_corrected

Unnamed: 0,post,degrees,azimuth,wind_direction,timestamp,run,concentration,distance,U,Se,...,U6.0,U12.0,T1.5,T3.0,T6.0,T12.0,q,correction_50,correction_100,correction_200
3,7,18,43,,1954-08-13 09:15:00,1.0,,50,2.17,11.2,...,-99.00,-99.00,-99.0,-99.00,-99.00,-99.00,1.00,0.93,0.93,0.92
3,7,18,43,,1954-08-13 09:15:00,1.0,,100,2.17,11.2,...,-99.00,-99.00,-99.0,-99.00,-99.00,-99.00,1.00,0.93,0.93,0.92
3,7,18,43,,1954-08-13 09:15:00,1.0,,200,2.17,11.2,...,-99.00,-99.00,-99.0,-99.00,-99.00,-99.00,1.00,0.93,0.93,0.92
4,8,21,46,,1954-08-13 09:15:00,1.0,,50,2.17,11.2,...,-99.00,-99.00,-99.0,-99.00,-99.00,-99.00,1.00,0.93,0.93,0.92
4,8,21,46,,1954-08-13 09:15:00,1.0,,100,2.17,11.2,...,-99.00,-99.00,-99.0,-99.00,-99.00,-99.00,1.00,0.93,0.93,0.92
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1016,30,87,112,0.17,1955-11-08 23:10:00,29.0,,100,1.78,3.2,...,2.62,3.11,2.6,3.02,3.29,3.44,7.96,0.96,0.96,0.97
1016,30,87,112,0.17,1955-11-08 23:10:00,29.0,,200,1.78,3.2,...,2.62,3.11,2.6,3.02,3.29,3.44,7.96,0.96,0.96,0.97
1017,31,90,115,,1955-11-08 23:10:00,29.0,,50,1.78,3.2,...,2.62,3.11,2.6,3.02,3.29,3.44,7.96,0.96,0.96,0.97
1017,31,90,115,,1955-11-08 23:10:00,29.0,,100,1.78,3.2,...,2.62,3.11,2.6,3.02,3.29,3.44,7.96,0.96,0.96,0.97
