## Brainstorming
#### Analysis Questions 


#### Data Quality Assessment
- **First**:
    - Renaming all the columns to a shorter, simplified name
    - Needing to use `FAWN_raw.columns = FAWN_raw.columns.str.strip()` to make all the white spaces between letters, numbers, or symbols single spaces so all the spaces are uniform 


- Check data types using `.info()`
    - displays: Column names, types, non-null counts, memory
- Check statistical description using `.describe()`
- Use a random function to randomly extract 7500 rows/observations from the dataset
- Use `.value_counts()` for frequency count of each category
- Convert all NaN to random number values based on the mean of each column

In [43]:
import numpy as np
import pandas as pd

# loading Dataset from a CSV file #
FAWN_raw = pd.read_csv("data/FAWN_report.csv")

FAWN_raw_spaces = FAWN_raw.columns.tolist()
FAWN_raw.rename(
    columns={
        "2m T avg (F)": "Temp_avg (F)",
        "2m T min (F)": "T_min (F)",
        "2m T max (F)": "T_max (F)",
        "2m DewPt avg (F)": "DewPt_avg (F)",
    },
    inplace=True,
)

FAWN_raw.columns = FAWN_raw.columns.str.replace("  ", " ")

FAWN_raw.rename(
    columns={
        "RelHum avg 2m (pct)": "RelHum_avg (pct)",
        "2m Rain tot (in)": "Rain_tot (in)",
        "2m Rain max over 15min (in)": "Rain_max over 15min (in)",
        "SolRad avg2m (w/m^2)": "SolRad_avg @ 2m (w/m^2)",
        "10m Wind avg (mph)": "Wind_avg @ 10m (mph)",
        "10m Wind min (mph)": "Wind_min @ 10m (mph)",
        "10m Wind max (mph)": "Wind_max @ 10m (mph)",
        "WDir avg10m (deg)": "Wind_Dir_avg @ 10m (deg)",
        "BP avg (mb)": "Barametric_Pre_avg (mb)",
        "N (# obs)": "N (#obs)",
        "2m WetBulb (F)": "WetBulb @ 2m (F)",
    },
    inplace=True,
)

# FAWN_raw = FAWN_raw.replace('0', np.nan)
pd.set_option("display.width", None)
pd.set_option("display.max_columns", None)
pd.set_option("display.expand_frame_repr", False)

# Extracting 7500 rows from dataset #
# Eng_cleanSize = Eng_clean.sample(n=7500, random_state=1)
# print(f"\n{Eng_raw.dtypes}\n")
# print(f"\n{Eng_raw.isna()}\n")

# print(f"{FAWN_raw_spaces}\n")
# print(f"{FAWN_raw.columns}\n")

print(f"\n{FAWN_raw.info()}\n")
# print(f"{FAWN_raw.describe()}\n")
print(f"Shape/dimensions of DataFrame: {FAWN_raw.shape}\n")
print(f"Number of Elements: {FAWN_raw.size}\n")

# Eng_raw.head(20)
print(f"Shape/dimensions of DataFrame: {FAWN_raw.shape}\n")
FAWN_raw.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6572 entries, 0 to 6571
Data columns (total 17 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   FAWN Station              6572 non-null   object 
 1   Period                    6572 non-null   object 
 2   Temp_avg (F)              6572 non-null   float64
 3   T_min (F)                 6572 non-null   float64
 4   T_max (F)                 6572 non-null   float64
 5   DewPt_avg (F)             6572 non-null   float64
 6   RelHum_avg (pct)          6572 non-null   int64  
 7   Rain_tot (in)             6572 non-null   float64
 8   Rain_max over 15min (in)  6572 non-null   float64
 9   SolRad_avg @ 2m (w/m^2)   6572 non-null   float64
 10  Wind_avg @ 10m (mph)      6572 non-null   float64
 11  Wind_min @ 10m (mph)      6572 non-null   float64
 12  Wind_max @ 10m (mph)      6572 non-null   float64
 13  Wind_Dir_avg @ 10m (deg)  6572 non-null   int64  
 14  Barametr

Unnamed: 0,FAWN Station,Period,Temp_avg (F),T_min (F),T_max (F),DewPt_avg (F),RelHum_avg (pct),Rain_tot (in),Rain_max over 15min (in),SolRad_avg @ 2m (w/m^2),Wind_avg @ 10m (mph),Wind_min @ 10m (mph),Wind_max @ 10m (mph),Wind_Dir_avg @ 10m (deg),Barametric_Pre_avg (mb),N (#obs),WetBulb @ 2m (F)
0,Apopka,1-Jan-22,73.4,66.4,82.4,65.34,78,0.0,0.0,156.5,4.29,0.0,17.83,196,1014,96,67.97
1,Apopka,2-Jan-22,73.41,67.06,81.93,68.16,84,0.0,0.0,124.04,5.42,0.13,18.36,221,1013,96,69.8
2,Apopka,3-Jan-22,61.63,47.97,71.69,50.1,68,0.03,0.03,178.06,9.36,0.43,28.5,309,1016,96,55.06
3,Apopka,4-Jan-22,58.21,43.94,72.05,51.42,80,0.0,0.0,144.97,5.12,0.05,16.96,39,1019,96,54.32
4,Apopka,5-Jan-22,62.97,54.84,74.03,58.64,87,0.0,0.0,116.53,4.12,0.02,14.23,353,1014,96,60.27
