In [1]:
from datetime import datetime as dt
from datetime import date,timedelta
import pandas as pd
pd.options.mode.chained_assignment = None 
import numpy as np
import os, sys, csv

In [2]:
def check_files():
    source = '/data/hmi_jpgs_512/'
    with open('labels_store/totalfiles_jpg_512.csv','w',newline='',encoding='utf-8-sig') as f:
        w = csv.writer(f)
        for path, subdirs, files in os.walk(source):
            for name in files:
                w.writerow([os.path.join(path, name)])
# check_files()

In [3]:
def bi_daily_obs(df, pws, pwe, stop, hrs):
    #Datetime 
    df['start'] = pd.to_datetime(df['start_time'], format='%Y-%m-%d %H:%M:%S')

    #New Empty DF
    emp = pd.DataFrame()

    #List to store intermediate results
    lis = []
    cols=['label', 'goes_class', 'goes_c']

    #Loop to check max from midnight to midnight and noon to noon
    while True:
        #Date with max intensity of flare with in the 24 hour window
        emp = df[ (df.start >= pws) & (df.start <= pwe) ].sort_values('goes_class', ascending=False).head(1).squeeze(axis=0)
        if pd.Series(emp.goes_class).empty:
            ins = ''
        else:
            ins = emp.goes_class
        lis.append([ pws, ins, ins])
        pws = pws + pd.Timedelta(hours=hrs)
        pwe = pwe + pd.Timedelta(hours=hrs)
        if pwe >= stop:
            break

    df_result = pd.DataFrame(lis, columns=cols)
    print('Completed!')
    return df_result

def binarize(df, modes, hrs):
    cols=['label', 'goes_class', 'goes_c']
    #Empty space and nan values are filled with 0 in the goes_class column
    df.replace(np.nan, str(0), inplace=True)
    df.replace('', str(0), inplace=True)

    #Replacing X and M class flare with 1 and rest with 0 in goes_class column
    if(modes=='M'):
        for i in range(len(df)):
            if (df.goes_class[i][0] == 'X' or  df.goes_class[i][0] == 'M'):
                df.goes_class[i] = 1
            else:
                df.goes_class[i] = 0
    else:
        for i in range(len(df)):
            if (df.goes_class[i][0] == 'X' or  df.goes_class[i][0] == 'M' or df.goes_class[i][0] == 'C'):
                df.goes_class[i] = 1
            else:
                df.goes_class[i] = 0
#     df.to_csv(r'labels_store/{modes}_full_dataset_{hrs}_hours.csv'.format(modes=modes, hrs=hrs), index=False, header=True, columns=cols)
    return df

#This function is used to convert the timestamps to name of images that we use in this research
def date_to_filename(df):
    cols=['label']
    for items in cols:

        df[items] = pd.to_datetime(df[items], format='%Y-%m-%d %H:%M:%S')

        #Renaming label(Date) to this format of file HMI.m2010.05.21_12.00.00 
        df[items] = df[items].dt.year.astype(str) + '/' \
            + df[items].dt.month.map("{:02}".format).astype(str) + '/'\
            + df[items].dt.day.map("{:02}".format).astype(str) + '/'+ 'HMI.m'+ df[items].dt.year.astype(str) + '.' \
            + df[items].dt.month.map("{:02}".format).astype(str) + '.'\
            + df[items].dt.day.map("{:02}".format).astype(str) + '_' \
            + df[items].dt.hour.map("{:02}".format).astype(str) + '.'\
            + df[items].dt.minute.map("{:02}".format).astype(str) + '.'\
            + df[items].dt.second.map("{:02}".format).astype(str) + '.jpg'
    
    return df

def filter_files(df, mode, hrs):
    data_df = df.copy()
    data_df.goes_class = data_df.goes_class.astype(str)
    data_df_final = data_df.copy()
    file = pd.read_csv('labels_store/totalfiles_jpg_512.csv', names=['filenames'])
    file_df = pd.DataFrame(file)
    print(file_df)
#     file_df['filenames'] = file_df['filenames'].map(lambda x: x.lstrip('/data/hmi_jpgs_512/'))
    file_df['filenames'] = file_df['filenames'].str.replace("/data/hmi_jpgs_512/","")
    print(file_df)
    data_df['label_mis'] = data_df.label.isin(file_df.filenames)
    data_df = data_df.applymap(lambda x: x if x else np.nan)
    data_df = data_df.dropna()
#     data_df.goes_class = data_df.goes_class.astype(int)
    cols=['label', 'goes_class', 'goes_c']
#     data_df.to_csv(r'labels_store/{mode}_full_dataset_cleaned_{hrs}_hours.csv'.format(mode=mode, hrs=hrs), index=False, header=True, columns=cols)
    return data_df

#Creating time-segmented 4-Fold CV Dataset, where 9 months of data is used for training and rest 3 for validation
def create_CVDataset(df, mode, hrs):
    cols=['label', 'goes_class', 'goes_c']
    search_list = [['01', '02', '03'], ['04', '05', '06'], ['07', '08', '09'], ['10', '11', '12']]
    for i in range(4):
        search_for = search_list[i]
        mask = df['label'].apply(lambda row: row[21:23]).str.contains('|'.join(search_for))
#         train = df[~mask]
        val = df[mask]
#         print(train['goes_class'].value_counts())
        print(val['goes_class'].value_counts())
        # Dumping the dataframe into CSV with label as Date and goes_class as intensity
#         train.to_csv(r'labels_store/{hrs}_{mode}_Fold{i}_train.csv'.format(mode=mode, i=i+1, hrs=hrs), index=False, header=True, columns=cols)
        val.to_csv(r'labels_store/{hrs}_{mode}_Partition{i}_.csv'.format(mode=mode, i=i+1, hrs=hrs), index=False, header=True, columns=cols)

In [4]:
#Load Original source for Goes Flare X-ray Flux 
data = pd.read_csv (r'label_source/goes_flares_integrated.csv')   

#Convert to DataFrame
dataframe = pd.DataFrame(data, columns= ['start_time','goes_class'])

#Prediction window Start
pws = pd.to_datetime('2010-12-01 00:00:00',format='%Y-%m-%d %H:%M:%S')

#Prediction Window Stop
pwe = pd.to_datetime('2010-12-01 05:59:59',format='%Y-%m-%d %H:%M:%S')

#Data available till 2018-12-30
stop = pd.to_datetime('2018-12-31 23:59:59',format='%Y-%m-%d %H:%M:%S')

#modes='M' is used to create labels for greater than or equal to M1.0 class flares
#modes= 'C' is used to create labels for greater than or equal to C1.0 class flares
mode = 'M'

#Calling functions in order
df_res = bi_daily_obs(dataframe, pws, pwe, stop, 1)
df_res2 = date_to_filename(df_res)
df_res3 = binarize(df_res2, mode, 1)
df_res4 = filter_files(df_res2, mode, 1)

create_CVDataset(df_res4, mode, 6)

Completed!
                                               filenames
0      /data/hmi_jpgs_512/2010/12/21/HMI.m2010.12.21_...
1      /data/hmi_jpgs_512/2010/12/21/HMI.m2010.12.21_...
2      /data/hmi_jpgs_512/2010/12/21/HMI.m2010.12.21_...
3      /data/hmi_jpgs_512/2010/12/21/HMI.m2010.12.21_...
4      /data/hmi_jpgs_512/2010/12/21/HMI.m2010.12.21_...
...                                                  ...
63724  /data/hmi_jpgs_512/2018/07/11/HMI.m2018.07.11_...
63725  /data/hmi_jpgs_512/2018/07/11/HMI.m2018.07.11_...
63726  /data/hmi_jpgs_512/2018/07/11/HMI.m2018.07.11_...
63727  /data/hmi_jpgs_512/2018/07/11/HMI.m2018.07.11_...
63728  /data/hmi_jpgs_512/2018/07/11/HMI.m2018.07.11_...

[63729 rows x 1 columns]
                                     filenames
0      2010/12/21/HMI.m2010.12.21_21.00.00.jpg
1      2010/12/21/HMI.m2010.12.21_09.00.00.jpg
2      2010/12/21/HMI.m2010.12.21_20.00.00.jpg
3      2010/12/21/HMI.m2010.12.21_08.00.00.jpg
4      2010/12/21/HMI.m2010.12.21_23.00.00.j

In [5]:
#Load Original source for Goes Flare X-ray Flux 
data = pd.read_csv (r'label_source/goes_flares_integrated.csv')   

#Convert to DataFrame
dataframe = pd.DataFrame(data, columns= ['start_time','goes_class'])

#Prediction window Start
pws = pd.to_datetime('2010-12-01 00:00:00',format='%Y-%m-%d %H:%M:%S')

#Prediction Window Stop
pwe = pd.to_datetime('2010-12-01 11:59:59',format='%Y-%m-%d %H:%M:%S')

#Data available till 2018-12-30
stop = pd.to_datetime('2018-12-31 23:59:59',format='%Y-%m-%d %H:%M:%S')

#modes='M' is used to create labels for greater than or equal to M1.0 class flares
#modes= 'C' is used to create labels for greater than or equal to C1.0 class flares
mode = 'M'

#Calling functions in order
df_res = bi_daily_obs(dataframe, pws, pwe, stop, 1)
df_res2 = date_to_filename(df_res)
df_res3 = binarize(df_res2, mode, 1)
df_res4 = filter_files(df_res2, mode, 1)

create_CVDataset(df_res4, mode, 12)

Completed!
                                               filenames
0      /data/hmi_jpgs_512/2010/12/21/HMI.m2010.12.21_...
1      /data/hmi_jpgs_512/2010/12/21/HMI.m2010.12.21_...
2      /data/hmi_jpgs_512/2010/12/21/HMI.m2010.12.21_...
3      /data/hmi_jpgs_512/2010/12/21/HMI.m2010.12.21_...
4      /data/hmi_jpgs_512/2010/12/21/HMI.m2010.12.21_...
...                                                  ...
63724  /data/hmi_jpgs_512/2018/07/11/HMI.m2018.07.11_...
63725  /data/hmi_jpgs_512/2018/07/11/HMI.m2018.07.11_...
63726  /data/hmi_jpgs_512/2018/07/11/HMI.m2018.07.11_...
63727  /data/hmi_jpgs_512/2018/07/11/HMI.m2018.07.11_...
63728  /data/hmi_jpgs_512/2018/07/11/HMI.m2018.07.11_...

[63729 rows x 1 columns]
                                     filenames
0      2010/12/21/HMI.m2010.12.21_21.00.00.jpg
1      2010/12/21/HMI.m2010.12.21_09.00.00.jpg
2      2010/12/21/HMI.m2010.12.21_20.00.00.jpg
3      2010/12/21/HMI.m2010.12.21_08.00.00.jpg
4      2010/12/21/HMI.m2010.12.21_23.00.00.j

In [6]:
#Load Original source for Goes Flare X-ray Flux 
data = pd.read_csv (r'label_source/goes_flares_integrated.csv')   

#Convert to DataFrame
dataframe = pd.DataFrame(data, columns= ['start_time','goes_class'])

#Prediction window Start
pws = pd.to_datetime('2010-12-01 00:00:00',format='%Y-%m-%d %H:%M:%S')

#Prediction Window Stop
pwe = pd.to_datetime('2010-12-01 17:59:59',format='%Y-%m-%d %H:%M:%S')

#Data available till 2018-12-30
stop = pd.to_datetime('2018-12-31 23:59:59',format='%Y-%m-%d %H:%M:%S')

#modes='M' is used to create labels for greater than or equal to M1.0 class flares
#modes= 'C' is used to create labels for greater than or equal to C1.0 class flares
mode = 'M'

#Calling functions in order
df_res = bi_daily_obs(dataframe, pws, pwe, stop, 1)
df_res2 = date_to_filename(df_res)
df_res3 = binarize(df_res2, mode, 1)
df_res4 = filter_files(df_res2, mode, 1)

create_CVDataset(df_res4, mode, 18)

Completed!
                                               filenames
0      /data/hmi_jpgs_512/2010/12/21/HMI.m2010.12.21_...
1      /data/hmi_jpgs_512/2010/12/21/HMI.m2010.12.21_...
2      /data/hmi_jpgs_512/2010/12/21/HMI.m2010.12.21_...
3      /data/hmi_jpgs_512/2010/12/21/HMI.m2010.12.21_...
4      /data/hmi_jpgs_512/2010/12/21/HMI.m2010.12.21_...
...                                                  ...
63724  /data/hmi_jpgs_512/2018/07/11/HMI.m2018.07.11_...
63725  /data/hmi_jpgs_512/2018/07/11/HMI.m2018.07.11_...
63726  /data/hmi_jpgs_512/2018/07/11/HMI.m2018.07.11_...
63727  /data/hmi_jpgs_512/2018/07/11/HMI.m2018.07.11_...
63728  /data/hmi_jpgs_512/2018/07/11/HMI.m2018.07.11_...

[63729 rows x 1 columns]
                                     filenames
0      2010/12/21/HMI.m2010.12.21_21.00.00.jpg
1      2010/12/21/HMI.m2010.12.21_09.00.00.jpg
2      2010/12/21/HMI.m2010.12.21_20.00.00.jpg
3      2010/12/21/HMI.m2010.12.21_08.00.00.jpg
4      2010/12/21/HMI.m2010.12.21_23.00.00.j

In [7]:
#Load Original source for Goes Flare X-ray Flux 
data = pd.read_csv (r'label_source/goes_flares_integrated.csv')   

#Convert to DataFrame
dataframe = pd.DataFrame(data, columns= ['start_time','goes_class'])

#Prediction window Start
pws = pd.to_datetime('2010-12-01 00:00:00',format='%Y-%m-%d %H:%M:%S')

#Prediction Window Stop
pwe = pd.to_datetime('2010-12-01 23:59:59',format='%Y-%m-%d %H:%M:%S')

#Data available till 2018-12-30
stop = pd.to_datetime('2018-12-31 23:59:59',format='%Y-%m-%d %H:%M:%S')

#modes='M' is used to create labels for greater than or equal to M1.0 class flares
#modes= 'C' is used to create labels for greater than or equal to C1.0 class flares
mode = 'M'

#Calling functions in order
df_res = bi_daily_obs(dataframe, pws, pwe, stop, 1)
df_res2 = date_to_filename(df_res)
df_res3 = binarize(df_res2, mode, 1)
df_res4 = filter_files(df_res2, mode, 1)

create_CVDataset(df_res4, mode, 24)

Completed!
                                               filenames
0      /data/hmi_jpgs_512/2010/12/21/HMI.m2010.12.21_...
1      /data/hmi_jpgs_512/2010/12/21/HMI.m2010.12.21_...
2      /data/hmi_jpgs_512/2010/12/21/HMI.m2010.12.21_...
3      /data/hmi_jpgs_512/2010/12/21/HMI.m2010.12.21_...
4      /data/hmi_jpgs_512/2010/12/21/HMI.m2010.12.21_...
...                                                  ...
63724  /data/hmi_jpgs_512/2018/07/11/HMI.m2018.07.11_...
63725  /data/hmi_jpgs_512/2018/07/11/HMI.m2018.07.11_...
63726  /data/hmi_jpgs_512/2018/07/11/HMI.m2018.07.11_...
63727  /data/hmi_jpgs_512/2018/07/11/HMI.m2018.07.11_...
63728  /data/hmi_jpgs_512/2018/07/11/HMI.m2018.07.11_...

[63729 rows x 1 columns]
                                     filenames
0      2010/12/21/HMI.m2010.12.21_21.00.00.jpg
1      2010/12/21/HMI.m2010.12.21_09.00.00.jpg
2      2010/12/21/HMI.m2010.12.21_20.00.00.jpg
3      2010/12/21/HMI.m2010.12.21_08.00.00.jpg
4      2010/12/21/HMI.m2010.12.21_23.00.00.j