In [1]:
# DESCRIPTION:
# Prototyping descriptive statistic data frames for one file.
#
# Each column specified in MODRDR6.FMT is separated by commas.
# To access each column by 'list indexing', I will have to 
# convert each readline to a line. This is pretty memory intensive though
# over millions of lines, so perhaps yield would be a better solution.
#
# Also consider the same thing for the lists themselves.

In [2]:
# Imports
import numpy as np
import pandas as pd
import os

In [None]:
# Dictionary generating function for weather columns
def create_weather_dict(series, descriptive_stat):
    """Iterates through series and creates weather data fields dictionary.
    
    :param series: <class 'pandas.core.series.Series'> to be iterated.
    :param descriptive_stat: <class 'str'> to determine if max, mean, or min dictionary 
    :return: <class 'dict'> weather_dict
    """
    weather_dict = {
        # Features
        f"{descriptive_stat} Wind Speed (m/s)": series[0],

        f"{descriptive_stat} Wind Direction (degrees)": series[1],

        f"{descriptive_stat} Ground Temperature (K)": series[2],

        f"{descriptive_stat} Air Temperature (K) for Boom 1": series[3],

        f"{descriptive_stat} Air Temperature (K) for Boom 2": series[4],

        f"{descriptive_stat} Local Relative Humidity (%)": series[6],

        f"{descriptive_stat} Atmospheric Pressure (Pa)": series[7],

        # Output
        f"{descriptive_stat} Ambient Air Temperature (K)": series[5],
    }

    return weather_dict


In [5]:
# Load data
data_path = r"C:\Dev\python\MachineLearning\BMI\data\raw_data\RME_398422988RMD00110000000_______P9.TAB"
df = pd.DataFrame(np.genfromtxt(data_path, delimiter=","))

In [27]:
# Display data -- significantly smaller HWS and WDIR sample size
new_df = df[[3, 5, 7, 11, 13, 15, 30, 37]]
new_df.describe()

Unnamed: 0,3,5,7,11,13,15,30,37
count,25.0,33.0,10620.0,12360.0,12360.0,12360.0,12235.0,12360.0
mean,6.5316,148.743636,245.7674,242.062113,226.560402,234.811741,5.02398,724.064675
std,1.445924,101.248044,31.082786,25.835413,39.710816,20.251043,7.654241,28.601167
min,4.31,58.37,162.11,196.54,97.67,196.54,0.0,692.67
25%,5.38,83.49,211.54,217.67,220.15,217.6375,0.89,695.04
50%,6.19,103.44,258.415,256.05,247.16,245.76,1.61,714.355
75%,7.82,168.47,274.1725,265.36,251.81,251.81,4.27,755.4
max,9.56,338.49,281.86,273.26,262.84,273.26,31.41,779.53


In [34]:
# Estimator dfs
min_df = new_df.min().reset_index(drop=True)
mean_df = new_df.mean().reset_index(drop=True)
max_df = new_df.max().reset_index(drop=True)

0      4.31
1     58.37
2    162.11
3    196.54
4     97.67
5    196.54
6      0.00
7    692.67
dtype: float64


In [35]:
# Build min dictionary from min_df
min_dict = create_weather_dict(min_df, "Minimum")
mean_dict = create_weather_dict(mean_df, "Mean")
max_dict = create_weather_dict(max_df, "Maximum")

In [21]:
# Combine and print dictionary
final_dict = {**min_dict, **mean_dict, **max_dict}

In [22]:
# Make estimator dataframe
est_df = pd.DataFrame(final_dict, index=[0])
est_df

Unnamed: 0,Minimum Wind Speed (m/s),Minimum Wind Direction (degrees),Minimum Ground Temperature (K),Minimum Air Temperature (K) for Boom 1,Minimum Air Temperature (K) for Boom 2,Minimum Local Relative Humidity (%),Minimum Atmospheric Pressure (Pa),Minimum Ambient Air Temperature (K),Mean Wind Speed (m/s),Mean Wind Direction (degrees),...,Mean Atmospheric Pressure (Pa),Mean Ambient Air Temperature (K),Maximum Wind Speed (m/s),Maximum Wind Direction (degrees),Maximum Ground Temperature (K),Maximum Air Temperature (K) for Boom 1,Maximum Air Temperature (K) for Boom 2,Maximum Local Relative Humidity (%),Maximum Atmospheric Pressure (Pa),Maximum Ambient Air Temperature (K)
0,4.31,58.37,162.11,196.54,97.67,0.0,692.67,196.54,6.5316,148.743636,...,724.064675,234.811741,9.56,338.49,281.86,273.26,262.84,31.41,779.53,273.26
