In [1]:
import numpy as np
import pandas as pd
import random
from datetime import datetime, timedelta

# GOES Flare Profiles for SolarNN

## Flare Profiles - Case 1
- Flare profiles determined by GOES detections and random intervals between GOES detections.

In [4]:
# Step 1: Read the CSV file
flares2017_2022 = pd.read_csv("/Users/joshuaingram/Main/Projects/SolarFlareNN/data/FlaresGoes161718Wavelet_2.csv")

# Step 2: Identify flare and wavelet indices
flare_indices = flares2017_2022.index[flares2017_2022['Flare'] == 1].tolist()
wavelet_indices = flares2017_2022.index[flares2017_2022['Wavelet'] == 1].tolist()

numFlares = len(flare_indices)

# Step 3: Initialize parameters and matrices
hist1 = 15  # mins before
hist2 = 15  # mins after
nonflareMultiple = 20  # how many non-flares to generate for every flare
cols = hist1 + hist2 + 1 + 1 + 1

# Generate flare profiles
FlareProfiles = np.empty((numFlares, cols))
FlareProfiles[:] = np.nan

j = 0
for i in flare_indices:
    if i > hist1 and i <= len(flares2017_2022) - hist2:
        fluxes = flares2017_2022['Flux'].iloc[(i - hist1):(i + hist2 + 1)] / flares2017_2022['Flux'].iloc[i]
        FlareProfiles[j, :-2] = fluxes.values
        FlareProfiles[j, -2] = 1
        FlareProfiles[j, -1] = flares2017_2022['secs_since_2000'].iloc[i]
        j += 1

# Step 4: Generate non-flare profiles
exclude_indices = set(flare_indices)
for k in range(1, 6):
    exclude_indices.update([idx + k for idx in flare_indices])
    exclude_indices.update([idx - k for idx in flare_indices])

nonflare_indices = set(range(hist1 + 1, len(flares2017_2022) - (hist2 + 1))) - exclude_indices
nonflare_indices = random.sample(nonflare_indices, nonflareMultiple * numFlares)

NotFlareProfiles = np.empty((nonflareMultiple * numFlares, cols))
NotFlareProfiles[:] = np.nan

j = 0
for i in nonflare_indices:
    fluxes = flares2017_2022['Flux'].iloc[(i - hist1):(i + hist2 + 1)] / flares2017_2022['Flux'].iloc[i]
    NotFlareProfiles[j, :-2] = fluxes.values
    NotFlareProfiles[j, -2] = 0
    NotFlareProfiles[j, -1] = flares2017_2022['secs_since_2000'].iloc[i]
    j += 1

# Step 5: Combine and save the profiles
Profiles = np.vstack([FlareProfiles, NotFlareProfiles])
Profiles_df = pd.DataFrame(Profiles, columns=[f'x{i+1}' for i in range(hist1 + hist2 + 1)] + ['y', 'SecsSince2000'])
Profiles_df = Profiles_df.dropna()

# Convert 'SecsSince2000' to datetime
Profiles_df['Date'] = pd.to_datetime(Profiles_df['SecsSince2000'], unit='s', origin=pd.Timestamp('2000-01-01 12:00:00'))
Profiles_df.drop('SecsSince2000', axis=1, inplace=True)

# Save to CSV
Profiles_df.to_csv("/Users/joshuaingram/Main/Projects/SolarFlareNN/Profiles2017_2022_case1.csv", index=False)

Profiles_df.head(), Profiles_df.shape

  flares2017_2022 = pd.read_csv("/Users/joshuaingram/Main/Projects/SolarFlareNN/data/FlaresGoes161718Wavelet_2.csv")
since Python 3.9 and will be removed in a subsequent version.
  nonflare_indices = random.sample(nonflare_indices, nonflareMultiple * numFlares)


(         x1        x2        x3        x4        x5        x6        x7  \
 0  0.249173  0.278074  0.289489  0.297877  0.323917  0.323846  0.293258   
 1  0.185302  0.195949  0.204914  0.203649  0.217488  0.225099  0.233949   
 2  0.071275  0.084802  0.110699  0.211815  0.258221  0.193603  0.142327   
 3  0.722010  0.677981  0.669214  0.650412  0.641111  0.633326  0.625018   
 4  0.729120  0.716180  0.701948  0.645696  0.609369  0.585797  0.576336   
 
          x8        x9       x10  ...       x24       x25       x26       x27  \
 0  0.300184  0.320395  0.401313  ...  0.600410  0.576188  0.541754  0.507117   
 1  0.248383  0.258860  0.267442  ...  0.877201  1.928971  3.298110  3.872649   
 2  0.106506  0.087165  0.077384  ...  0.351790  0.349945  0.343246  0.341172   
 3  0.647180  0.679063  0.713814  ...  0.766183  0.723078  0.695107  0.683881   
 4  0.560038  0.550306  0.534466  ...  1.558642  2.091865  2.234191  2.047342   
 
         x28       x29       x30       x31    y       

## Flare Profiles - Case 2
- Flare profiles determined by GOES detections and random intervals between wavelet detections.

In [11]:
# Step 1: Read the CSV file
flares2017_2022 = pd.read_csv("/Users/joshuaingram/Main/Projects/SolarFlareNN/data/FlaresGoes161718Wavelet_2.csv")

# Step 2: Identify flare and wavelet indices
flare_indices = flares2017_2022.index[flares2017_2022['Flare'] == 1].tolist()
wavelet_indices = flares2017_2022.index[flares2017_2022['Wavelet'] == 1].tolist()

numFlares = len(flare_indices)

# Step 3: Initialize parameters and matrices
hist1 = 15  # mins before
hist2 = 15  # mins after
nonflareMultiple = 20  # how many non-flares to generate for every flare
cols = hist1 + hist2 + 1 + 1 + 1

FlareProfiles = np.empty((numFlares, cols))
FlareProfiles[:] = np.nan

j = 0
for i in flare_indices:
    if i > hist1 and i <= len(flares2017_2022) - hist2:
        fluxes = flares2017_2022['Flux'].iloc[(i - hist1):(i + hist2 + 1)] / flares2017_2022['Flux'].iloc[i]
        FlareProfiles[j, :-2] = fluxes.values
        FlareProfiles[j, -2] = 1
        FlareProfiles[j, -1] = flares2017_2022['secs_since_2000'].iloc[i]
        j += 1

# Step 4: Generate non-flare profiles
exclude_indices = set(wavelet_indices)
for k in range(1, 6):
    exclude_indices.update([idx + k for idx in wavelet_indices])
    exclude_indices.update([idx - k for idx in wavelet_indices])

nonflare_indices = set(range(hist1 + 1, len(flares2017_2022) - (hist2 + 1))) - exclude_indices
nonflare_indices = random.sample(nonflare_indices, nonflareMultiple * numFlares)

NotFlareProfiles = np.empty((nonflareMultiple * numFlares, cols))
NotFlareProfiles[:] = np.nan

j = 0
for i in nonflare_indices:
    fluxes = flares2017_2022['Flux'].iloc[(i - hist1):(i + hist2 + 1)] / flares2017_2022['Flux'].iloc[i]
    NotFlareProfiles[j, :-2] = fluxes.values
    NotFlareProfiles[j, -2] = 0
    NotFlareProfiles[j, -1] = flares2017_2022['secs_since_2000'].iloc[i]
    j += 1

# Step 5: Combine and save the profiles
Profiles = np.vstack([FlareProfiles, NotFlareProfiles])
Profiles_df = pd.DataFrame(Profiles, columns=[f'x{i+1}' for i in range(hist1 + hist2 + 1)] + ['y', 'SecsSince2000'])
Profiles_df = Profiles_df.dropna()

# Convert 'SecsSince2000' to datetime
Profiles_df['Date'] = pd.to_datetime(Profiles_df['SecsSince2000'], unit='s', origin=pd.Timestamp('2000-01-01 12:00:00'))
Profiles_df.drop('SecsSince2000', axis=1, inplace=True)

# Save to CSV
Profiles_df.to_csv("/Users/joshuaingram/Main/Projects/SolarFlareNN/Profiles2017_2022_case2.csv", index=False)

Profiles_df.head(), Profiles_df.shape

  flares2017_2022 = pd.read_csv("/Users/joshuaingram/Main/Projects/SolarFlareNN/data/FlaresGoes161718Wavelet_2.csv")
since Python 3.9 and will be removed in a subsequent version.
  nonflare_indices = random.sample(nonflare_indices, nonflareMultiple * numFlares)


(         x1        x2        x3        x4        x5        x6        x7  \
 0  0.249173  0.278074  0.289489  0.297877  0.323917  0.323846  0.293258   
 1  0.185302  0.195949  0.204914  0.203649  0.217488  0.225099  0.233949   
 2  0.071275  0.084802  0.110699  0.211815  0.258221  0.193603  0.142327   
 3  0.722010  0.677981  0.669214  0.650412  0.641111  0.633326  0.625018   
 4  0.729120  0.716180  0.701948  0.645696  0.609369  0.585797  0.576336   
 
          x8        x9       x10  ...       x24       x25       x26       x27  \
 0  0.300184  0.320395  0.401313  ...  0.600410  0.576188  0.541754  0.507117   
 1  0.248383  0.258860  0.267442  ...  0.877201  1.928971  3.298110  3.872649   
 2  0.106506  0.087165  0.077384  ...  0.351790  0.349945  0.343246  0.341172   
 3  0.647180  0.679063  0.713814  ...  0.766183  0.723078  0.695107  0.683881   
 4  0.560038  0.550306  0.534466  ...  1.558642  2.091865  2.234191  2.047342   
 
         x28       x29       x30       x31    y       

## Flare Profiles - Case 3
- Flare profiles for positive cases determined by GOES detections, and slide center point of window over +/- minutes from peak of flare. Negative cases determined by wavelet detection

In [10]:
# Step 1: Read the CSV file
flares2017_2022 = pd.read_csv("/Users/joshuaingram/Main/Projects/SolarFlareNN/data/FlaresGoes161718Wavelet_2.csv")

# Step 2: Identify flare and wavelet indices
flare_indices = flares2017_2022.index[flares2017_2022['Flare'] == 1].tolist()
wavelet_indices = flares2017_2022.index[flares2017_2022['Wavelet'] == 1].tolist()

# label fluxes +/- 3 mins from peak as 1
new_flare_indices = []
for i in flare_indices:
        new_flare_indices.extend([i-3, i-2, i-1, i, i+1, i+2, i+3]) # indices of +/- 3 mins from peak + peak itself

flare_indices = new_flare_indices

numFlares = len(flare_indices)

# Step 3: Initialize parameters and matrices
hist1 = 15  # mins before
hist2 = 15  # mins after
nonflareMultiple = 5  # how many non-flares to generate for every flare
cols = hist1 + hist2 + 1 + 1 + 1

FlareProfiles = np.empty((numFlares, cols))
FlareProfiles[:] = np.nan

j = 0
for i in flare_indices:
    if i > hist1 and i <= len(flares2017_2022) - hist2:
        fluxes = flares2017_2022['Flux'].iloc[(i - hist1):(i + hist2 + 1)] / flares2017_2022['Flux'].iloc[i]
        FlareProfiles[j, :-2] = fluxes.values
        FlareProfiles[j, -2] = 1
        FlareProfiles[j, -1] = flares2017_2022['secs_since_2000'].iloc[i]
        j += 1

# Step 4: Generate non-flare profiles
exclude_indices = set(wavelet_indices)
for k in range(1, 6):
    exclude_indices.update([idx + k for idx in wavelet_indices])
    exclude_indices.update([idx - k for idx in wavelet_indices])

nonflare_indices = set(range(hist1 + 1, len(flares2017_2022) - (hist2 + 1))) - exclude_indices
nonflare_indices = random.sample(nonflare_indices, nonflareMultiple * numFlares)

NotFlareProfiles = np.empty((nonflareMultiple * numFlares, cols))
NotFlareProfiles[:] = np.nan

j = 0
for i in nonflare_indices:
    fluxes = flares2017_2022['Flux'].iloc[(i - hist1):(i + hist2 + 1)] / flares2017_2022['Flux'].iloc[i]
    NotFlareProfiles[j, :-2] = fluxes.values
    NotFlareProfiles[j, -2] = 0
    NotFlareProfiles[j, -1] = flares2017_2022['secs_since_2000'].iloc[i]
    j += 1

# Step 5: Combine and save the profiles
Profiles = np.vstack([FlareProfiles, NotFlareProfiles])
Profiles_df = pd.DataFrame(Profiles, columns=[f'x{i+1}' for i in range(hist1 + hist2 + 1)] + ['y', 'SecsSince2000'])
Profiles_df = Profiles_df.dropna()

# Convert 'SecsSince2000' to datetime
Profiles_df['Date'] = pd.to_datetime(Profiles_df['SecsSince2000'], unit='s', origin=pd.Timestamp('2000-01-01 12:00:00'))
Profiles_df.drop('SecsSince2000', axis=1, inplace=True)

# Save to CSV
Profiles_df.to_csv("/Users/joshuaingram/Main/Projects/SolarFlareNN/Profiles2017_2022_case3.csv", index=False)

Profiles_df.head(), Profiles_df.shape

  flares2017_2022 = pd.read_csv("/Users/joshuaingram/Main/Projects/SolarFlareNN/data/FlaresGoes161718Wavelet_2.csv")
since Python 3.9 and will be removed in a subsequent version.
  nonflare_indices = random.sample(nonflare_indices, nonflareMultiple * numFlares)


(         x1        x2        x3        x4        x5        x6        x7  \
 0  0.428887  0.404045  0.357289  0.337403  0.376537  0.391995  0.403353   
 1  0.339866  0.300537  0.283810  0.316727  0.329730  0.339284  0.368943   
 2  0.264036  0.249340  0.278260  0.289683  0.298077  0.324133  0.324063   
 3  0.249173  0.278074  0.289489  0.297877  0.323917  0.323846  0.293258   
 4  0.302252  0.314660  0.323778  0.352081  0.352004  0.318757  0.326285   
 
          x8        x9       x10  ...       x24       x25       x26       x27  \
 0  0.438612  0.438517  0.397098  ...  0.884744  0.846193  0.828326  0.813009   
 1  0.368862  0.334022  0.341911  ...  0.711782  0.696753  0.683870  0.656281   
 2  0.293454  0.300385  0.320609  ...  0.612130  0.600812  0.576574  0.542117   
 3  0.300184  0.320395  0.401313  ...  0.600410  0.576188  0.541754  0.507117   
 4  0.348253  0.436207  0.548378  ...  0.626288  0.588860  0.551211  0.526962   
 
         x28       x29       x30       x31    y       

## Flare Profiles - Case 4
- Flare profiles determined by GOES detections and random intervals between wavelet detections. Do not normalize flux values.

In [None]:
# Step 1: Read the CSV file
flares2017_2022 = pd.read_csv("/Users/joshuaingram/Main/Projects/SolarFlareNN/data/FlaresGoes161718Wavelet_2.csv")

# Step 2: Identify flare and wavelet indices
flare_indices = flares2017_2022.index[flares2017_2022['Flare'] == 1].tolist()
wavelet_indices = flares2017_2022.index[flares2017_2022['Wavelet'] == 1].tolist()

numFlares = len(flare_indices)

# Step 3: Initialize parameters and matrices
hist1 = 15  # mins before
hist2 = 15  # mins after
nonflareMultiple = 20  # how many non-flares to generate for every flare
cols = hist1 + hist2 + 1 + 1 + 1

FlareProfiles = np.empty((numFlares, cols))
FlareProfiles[:] = np.nan

j = 0
for i in flare_indices:
    if i > hist1 and i <= len(flares2017_2022) - hist2:
        fluxes = flares2017_2022['Flux'].iloc[(i - hist1):(i + hist2 + 1)]
        FlareProfiles[j, :-2] = fluxes.values
        FlareProfiles[j, -2] = 1
        FlareProfiles[j, -1] = flares2017_2022['secs_since_2000'].iloc[i]
        j += 1

# Step 4: Generate non-flare profiles
exclude_indices = set(wavelet_indices)
for k in range(1, 6):
    exclude_indices.update([idx + k for idx in wavelet_indices])
    exclude_indices.update([idx - k for idx in wavelet_indices])

nonflare_indices = set(range(hist1 + 1, len(flares2017_2022) - (hist2 + 1))) - exclude_indices
nonflare_indices = random.sample(nonflare_indices, nonflareMultiple * numFlares)

NotFlareProfiles = np.empty((nonflareMultiple * numFlares, cols))
NotFlareProfiles[:] = np.nan

j = 0
for i in nonflare_indices:
    fluxes = flares2017_2022['Flux'].iloc[(i - hist1):(i + hist2 + 1)]
    NotFlareProfiles[j, :-2] = fluxes.values
    NotFlareProfiles[j, -2] = 0
    NotFlareProfiles[j, -1] = flares2017_2022['secs_since_2000'].iloc[i]
    j += 1

# Step 5: Combine and save the profiles
Profiles = np.vstack([FlareProfiles, NotFlareProfiles])
Profiles_df = pd.DataFrame(Profiles, columns=[f'x{i+1}' for i in range(hist1 + hist2 + 1)] + ['y', 'SecsSince2000'])
Profiles_df = Profiles_df.dropna()

# Convert 'SecsSince2000' to datetime
Profiles_df['Date'] = pd.to_datetime(Profiles_df['SecsSince2000'], unit='s', origin=pd.Timestamp('2000-01-01 12:00:00'))
Profiles_df.drop('SecsSince2000', axis=1, inplace=True)

# Save to CSV
Profiles_df.to_csv("/Users/joshuaingram/Main/Projects/SolarFlareNN/Profiles2017_2022_case4.csv", index=False)

Profiles_df.head(), Profiles_df.shape

## Flare Profiles - Case 5
- Flare profiles determined by GOES detections and random intervals between wavelet detections. Change time scales of flare profiles.

In [12]:
# Step 1: Read the CSV file
flares2017_2022 = pd.read_csv("/Users/joshuaingram/Main/Projects/SolarFlareNN/data/FlaresGoes161718Wavelet_2.csv")

# Step 2: Identify flare and wavelet indices
flare_indices = flares2017_2022.index[flares2017_2022['Flare'] == 1].tolist()
wavelet_indices = flares2017_2022.index[flares2017_2022['Wavelet'] == 1].tolist()

numFlares = len(flare_indices)

# Step 3: Initialize parameters and matrices
hist1 = 15  # mins before
hist2 = 15  # mins after
nonflareMultiple = 20  # how many non-flares to generate for every flare
cols = hist1 + hist2 + 1 + 1 + 1

FlareProfiles = np.empty((numFlares, cols))
FlareProfiles[:] = np.nan

j = 0
for i in flare_indices:
    if i > hist1 and i <= len(flares2017_2022) - hist2:
        fluxes = flares2017_2022['Flux'].iloc[(i - hist1):(i + hist2 + 1)] / flares2017_2022['Flux'].iloc[i]
        FlareProfiles[j, :-2] = fluxes.values
        FlareProfiles[j, -2] = 1
        FlareProfiles[j, -1] = flares2017_2022['secs_since_2000'].iloc[i]
        j += 1

# Step 4: Generate non-flare profiles
exclude_indices = set(wavelet_indices)
for k in range(1, 6):
    exclude_indices.update([idx + k for idx in wavelet_indices])
    exclude_indices.update([idx - k for idx in wavelet_indices])

nonflare_indices = set(range(hist1 + 1, len(flares2017_2022) - (hist2 + 1))) - exclude_indices
nonflare_indices = random.sample(nonflare_indices, nonflareMultiple * numFlares)

NotFlareProfiles = np.empty((nonflareMultiple * numFlares, cols))
NotFlareProfiles[:] = np.nan

j = 0
for i in nonflare_indices:
    fluxes = flares2017_2022['Flux'].iloc[(i - hist1):(i + hist2 + 1)] / flares2017_2022['Flux'].iloc[i]
    NotFlareProfiles[j, :-2] = fluxes.values
    NotFlareProfiles[j, -2] = 0
    NotFlareProfiles[j, -1] = flares2017_2022['secs_since_2000'].iloc[i]
    j += 1

# Step 5: Combine and save the profiles
Profiles = np.vstack([FlareProfiles, NotFlareProfiles])
Profiles_df = pd.DataFrame(Profiles, columns=[f'x{i+1}' for i in range(hist1 + hist2 + 1)] + ['y', 'SecsSince2000'])
Profiles_df = Profiles_df.dropna()

# Convert 'SecsSince2000' to datetime
Profiles_df['Date'] = pd.to_datetime(Profiles_df['SecsSince2000'], unit='s', origin=pd.Timestamp('2000-01-01 12:00:00'))
Profiles_df.drop('SecsSince2000', axis=1, inplace=True)

# Save to CSV
Profiles_df.to_csv("/Users/joshuaingram/Main/Projects/SolarFlareNN/Profiles2017_2022_case2.csv", index=False)

Profiles_df.head(), Profiles_df.shape

ModuleNotFoundError: No module named 'tensorflow'

## Flare Profiles - Case 5 
- Flare profiles determined by GOES detections, random intervals between wavelet detections, and random noise at sub A-class flux levels.

In [None]:
# Step 1: Read the CSV file
flares2017_2022 = pd.read_csv("/Users/joshuaingram/Main/Projects/SolarFlareNN/data/FlaresGoes161718Wavelet_2.csv")

# Step 2: Identify flare and wavelet indices
flare_indices = flares2017_2022.index[flares2017_2022['Flare'] == 1].tolist()
wavelet_indices = flares2017_2022.index[flares2017_2022['Wavelet'] == 1].tolist()

numFlares = len(flare_indices)

# Step 3: Initialize parameters and matrices
hist1 = 15  # mins before
hist2 = 15  # mins after
nonflareMultiple = 20  # how many non-flares to generate for every flare
cols = hist1 + hist2 + 1 + 1 + 1

FlareProfiles = np.empty((numFlares, cols))
FlareProfiles[:] = np.nan

j = 0
for i in flare_indices:
    if i > hist1 and i <= len(flares2017_2022) - hist2:
        fluxes = flares2017_2022['Flux'].iloc[(i - hist1):(i + hist2 + 1)] / flares2017_2022['Flux'].iloc[i]
        FlareProfiles[j, :-2] = fluxes.values
        FlareProfiles[j, -2] = 1
        FlareProfiles[j, -1] = flares2017_2022['secs_since_2000'].iloc[i]
        j += 1

# Step 4: Generate non-flare profiles
exclude_indices = set(wavelet_indices)
for k in range(1, 6):
    exclude_indices.update([idx + k for idx in wavelet_indices])
    exclude_indices.update([idx - k for idx in wavelet_indices])

nonflare_indices = set(range(hist1 + 1, len(flares2017_2022) - (hist2 + 1))) - exclude_indices
nonflare_indices = random.sample(nonflare_indices, nonflareMultiple * numFlares)

NotFlareProfiles = np.empty((nonflareMultiple * numFlares, cols))
NotFlareProfiles[:] = np.nan

j = 0
for i in nonflare_indices:
    fluxes = flares2017_2022['Flux'].iloc[(i - hist1):(i + hist2 + 1)] / flares2017_2022['Flux'].iloc[i]
    NotFlareProfiles[j, :-2] = fluxes.values
    NotFlareProfiles[j, -2] = 0
    NotFlareProfiles[j, -1] = flares2017_2022['secs_since_2000'].iloc[i]
    j += 1

# Step 5: Combine and save the profiles
Profiles = np.vstack([FlareProfiles, NotFlareProfiles])
Profiles_df = pd.DataFrame(Profiles, columns=[f'x{i+1}' for i in range(hist1 + hist2 + 1)] + ['y', 'SecsSince2000'])
Profiles_df = Profiles_df.dropna()

# Convert 'SecsSince2000' to datetime
Profiles_df['Date'] = pd.to_datetime(Profiles_df['SecsSince2000'], unit='s', origin=pd.Timestamp('2000-01-01 12:00:00'))
Profiles_df.drop('SecsSince2000', axis=1, inplace=True)

# Save to CSV
Profiles_df.to_csv("/Users/joshuaingram/Main/Projects/SolarFlareNN/Profiles2017_2022_case2.csv", index=False)

Profiles_df.head(), Profiles_df.shape

## Model Evaluation Data - Flare Injections
- Inject a select set of flare profiles into different areas of GOES light curves. This data will be used to evaluate the performance of the CNN