In [1]:
import pandas as pd

#Load the CSV file
try:
    df = pd.read_csv("solar_flare_data.csv")
except FileNotFoundError:
    print("Error: Data file not found.")
    exit()

# keep only rows where the flare is at its peak
flare_peaks = df[df['status'] == 'EVENT_PEAK'].copy()


# Cleaning the data
# drop rows without known flare class
flare_peaks = flare_peaks.dropna(subset=['flare_class'])
# extract flare class category
flare_peaks['class_category'] = flare_peaks['flare_class'].str[0]

# define df_cleaned
df_cleaned = df.dropna().copy()

# convert time to datetime format
try:
    df_cleaned['time'] = pd.to_datetime(df_cleaned['time'], unit = 's')
except ValueError:
    print("Error converting 'time' column")
    exit()

# map flare class category to numeric scale
flare_class_mapping = {'A': 1, 'B': 2, 'C': 3, 'M': 4, 'X': 5}
df_cleaned['flare_class_numerical'] = df_cleaned['flare_class'].str[0].map(flare_class_mapping).fillna(0).astype(int)

# extract numeric part of flare class
def get_flare_magnitude(flare_str):
    if isinstance(flare_str, str) and len(flare_str) > 1:
        try:
            return float(flare_str[1:])
        except ValueError:
            return 0.0
    return 0.0

# get magnitude value as a new column
df_cleaned['flare_magnitude'] = df_cleaned['flare_class'].apply(get_flare_magnitude)
df_cleaned['flare_strength'] = df_cleaned['flare_class_numerical'] + (df_cleaned['flare_magnitude'] / 10.0)
df_cleaned.info()


<class 'pandas.core.frame.DataFrame'>
Index: 14374 entries, 1 to 53229
Data columns (total 10 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   xrsb_flux              14374 non-null  float64       
 1   status                 14374 non-null  object        
 2   time                   14374 non-null  datetime64[ns]
 3   background_flux        14374 non-null  float64       
 4   flare_class            14374 non-null  object        
 5   integrated_flux        14374 non-null  float64       
 6   flare_id               14374 non-null  int64         
 7   flare_class_numerical  14374 non-null  int64         
 8   flare_magnitude        14374 non-null  float64       
 9   flare_strength         14374 non-null  float64       
dtypes: datetime64[ns](1), float64(5), int64(2), object(2)
memory usage: 1.2+ MB


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=059000a2-e207-4dc7-9f7e-f909ccf665aa' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>