### Import Required Libraries and Set Up Environment Variables

In [2]:
# Dependencies
import requests
import time
from dotenv import load_dotenv
import os
import pandas as pd
import json
import os
from datetime import datetime
## Load the NASA_API_KEY from the env file
load_dotenv()
NASA_API_KEY = os.getenv('NASA_API_KEY')


### CME Data

In [5]:
# Set the base URL to NASA's DONKI API:
base_url = "https://api.nasa.gov/DONKI/"

# Set the specifier for CMEs:
CME = "CME"

# Search for CMEs published between a begin and end date
startDate = "2013-05-01"
endDate   = "2024-05-01"

# Build URL for CME
query_url = base_url + CME + "?" + "startDate=" + startDate + "&endDate=" + endDate + "&api_key=" + NASA_API_KEY
#print(query_url)

In [15]:
# Make a "GET" request for the CME URL and store it in a variable named cme_response
cme_response = requests.get(query_url).json()


In [16]:
# Convert the response variable to json and store it as a variable named cme_json
cme_json = json.dumps(cme_response)


In [186]:
# Preview the first result in JSON format
# Use json.dumps with argument indent=4 to format data
print(json.dumps(cme_json, indent=4))


"[{\"activityID\": \"2013-05-01T03:12:00-CME-001\", \"catalog\": \"M2M_CATALOG\", \"startTime\": \"2013-05-01T03:12Z\", \"instruments\": [{\"displayName\": \"SOHO: LASCO/C2\"}, {\"displayName\": \"SOHO: LASCO/C3\"}, {\"displayName\": \"STEREO A: SECCHI/COR2\"}, {\"displayName\": \"STEREO B: SECCHI/COR2\"}], \"sourceLocation\": \"\", \"activeRegionNum\": null, \"note\": \"\", \"submissionTime\": \"2013-08-07T16:54Z\", \"versionId\": 1, \"link\": \"https://webtools.ccmc.gsfc.nasa.gov/DONKI/view/CME/2349/-1\", \"cmeAnalyses\": [{\"isMostAccurate\": true, \"time21_5\": \"2013-05-01T07:07Z\", \"latitude\": 12.0, \"longitude\": -120.0, \"halfAngle\": 36.0, \"speed\": 860.0, \"type\": \"C\", \"featureCode\": \"null\", \"imageType\": null, \"measurementTechnique\": \"null\", \"note\": \"\", \"levelOfData\": 0, \"tilt\": null, \"minorHalfWidth\": null, \"speedMeasuredAtHeight\": null, \"submissionTime\": \"2013-08-07T16:54Z\", \"link\": \"https://webtools.ccmc.gsfc.nasa.gov/DONKI/view/CMEAnalys

In [137]:
# Convert cme_json to a Pandas DataFrame 
#cme_df = pd.DataFrame([cme_json])
data = json.loads(cme_json)
cme_df = pd.json_normalize(data)

# Keep only the columns: activityID, startTime, linkedEvents
select_df = cme_df[['activityID', 'startTime', 'linkedEvents']]
select_df.head()

Unnamed: 0,activityID,startTime,linkedEvents
0,2013-05-01T03:12:00-CME-001,2013-05-01T03:12Z,[{'activityID': '2013-05-04T04:52:00-IPS-001'}]
1,2013-05-02T05:24:00-CME-001,2013-05-02T05:24Z,
2,2013-05-02T14:36:00-CME-001,2013-05-02T14:36Z,
3,2013-05-03T18:00:00-CME-001,2013-05-03T18:00Z,
4,2013-05-03T22:36:00-CME-001,2013-05-03T22:36Z,[{'activityID': '2013-05-07T04:37:00-IPS-001'}]


In [138]:
# Notice that the linkedEvents column allows us to identify the corresponding GST
# Remove rows with missing 'linkedEvents' since we won't be able to assign these to GSTs
select_df.dropna(subset=['linkedEvents'], inplace=True)
select_df.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  select_df.dropna(subset=['linkedEvents'], inplace=True)


Unnamed: 0,activityID,startTime,linkedEvents
0,2013-05-01T03:12:00-CME-001,2013-05-01T03:12Z,[{'activityID': '2013-05-04T04:52:00-IPS-001'}]
4,2013-05-03T22:36:00-CME-001,2013-05-03T22:36Z,[{'activityID': '2013-05-07T04:37:00-IPS-001'}]
7,2013-05-09T19:29:00-CME-001,2013-05-09T19:29Z,[{'activityID': '2013-05-12T23:30:00-IPS-001'}]
10,2013-05-13T02:54:00-CME-001,2013-05-13T02:54Z,[{'activityID': '2013-05-13T01:53:00-FLR-001'}...
13,2013-05-13T16:18:00-CME-001,2013-05-13T16:18Z,[{'activityID': '2013-05-13T15:40:00-FLR-001'}...


In [191]:
# Notice that the linkedEvents sometimes contains multiple events per row
# Write a nested for loop that iterates first over each row in the cme DataFrame (using the index)
# and then iterates over the values in 'linkedEvents' 
# and adds the elements individually to a list of dictionaries where each row is one element 

# Initialize an empty list to store the expanded rows
expanded_rows = []

# Iterate over each index in the DataFrame
for index, row in select_df.iterrows():
    # Iterate over each dictionary in the list
    for event in row['linkedEvents']:
        # Append a new dictionary to the expanded_rows list for each dictionary item and corresponding 'activityID' and 'startTime' value
        expanded_rows.append({
            'index': index,
            'activityID': row['activityID'],
            'startTime': row['startTime'],
            'event': event
            })
# Create a new DataFrame from the expanded rows
expand_df = pd.DataFrame(expanded_rows)
expand_df.drop('index', axis=1, inplace=True)
expand_df.head()


Unnamed: 0,activityID,startTime,event
0,2013-05-01T03:12:00-CME-001,2013-05-01T03:12Z,{'activityID': '2013-05-04T04:52:00-IPS-001'}
1,2013-05-03T22:36:00-CME-001,2013-05-03T22:36Z,{'activityID': '2013-05-07T04:37:00-IPS-001'}
2,2013-05-09T19:29:00-CME-001,2013-05-09T19:29Z,{'activityID': '2013-05-12T23:30:00-IPS-001'}
3,2013-05-13T02:54:00-CME-001,2013-05-13T02:54Z,{'activityID': '2013-05-13T01:53:00-FLR-001'}
4,2013-05-13T02:54:00-CME-001,2013-05-13T02:54Z,{'activityID': '2013-05-13T04:12:00-SEP-001'}


In [113]:
# Create a function called extract_activityID_from_dict that takes a dict as input such as in linkedEvents
# and verify below that it works as expected using one row from linkedEvents as an example
# Be sure to use a try and except block to handle errors
def extract_activityID_from_dict(input_dict):
        try:
                activity_id = input_dict['activityID']
                return str(activity_id)
        except (ValueError, TypeError) as e:
                print(f"Error extracting activityID: {e}")
                return None

extract_activityID_from_dict(expand_df.loc[0,'event'])        

'2013-05-04T04:52:00-IPS-001'

In [114]:
# Apply this function to each row in the 'linkedEvents' column (you can use apply() and a lambda function)
# and create a new column called 'GST_ActivityID' using loc indexer:
expand_df['GST_ActivityID'] = expand_df['event'].apply(lambda x: extract_activityID_from_dict(x))
expand_df.head()

Unnamed: 0,activityID,startTime,event,GST_ActivityID
0,2013-05-01T03:12:00-CME-001,2013-05-01T03:12Z,{'activityID': '2013-05-04T04:52:00-IPS-001'},2013-05-04T04:52:00-IPS-001
1,2013-05-03T22:36:00-CME-001,2013-05-03T22:36Z,{'activityID': '2013-05-07T04:37:00-IPS-001'},2013-05-07T04:37:00-IPS-001
2,2013-05-09T19:29:00-CME-001,2013-05-09T19:29Z,{'activityID': '2013-05-12T23:30:00-IPS-001'},2013-05-12T23:30:00-IPS-001
3,2013-05-13T02:54:00-CME-001,2013-05-13T02:54Z,{'activityID': '2013-05-13T01:53:00-FLR-001'},2013-05-13T01:53:00-FLR-001
4,2013-05-13T02:54:00-CME-001,2013-05-13T02:54Z,{'activityID': '2013-05-13T04:12:00-SEP-001'},2013-05-13T04:12:00-SEP-001


In [115]:
# Remove rows with missing GST_ActivityID, since we can't assign them to GSTs:
expand_df.dropna(subset=['GST_ActivityID'], inplace=True)

In [119]:
# print out the datatype of each column in this DataFrame:
expand_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 184 entries, 0 to 183
Data columns (total 4 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   activityID      184 non-null    object
 1   startTime       184 non-null    object
 2   event           184 non-null    object
 3   GST_ActivityID  184 non-null    object
dtypes: object(4)
memory usage: 5.9+ KB


In [120]:
# Convert the 'GST_ActivityID' column to string format 
expand_df['GST_ActivityID'] = expand_df['GST_ActivityID'].astype(str)
# Convert startTime to datetime format  
expand_df['startTime'] = pd.to_datetime(expand_df['startTime'])
# Rename startTime to startTime_CME and activityID to cmeID
expand_df = expand_df.rename(columns={'startTime': 'startTime_CME', 'activityID': 'cmeID'})
# Drop linkedEvents
expand_df.drop('event', axis=1, inplace=True)
# Verify that all steps were executed correctly
expand_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 184 entries, 0 to 183
Data columns (total 3 columns):
 #   Column          Non-Null Count  Dtype              
---  ------          --------------  -----              
 0   cmeID           184 non-null    object             
 1   startTime_CME   184 non-null    datetime64[ns, UTC]
 2   GST_ActivityID  184 non-null    object             
dtypes: datetime64[ns, UTC](1), object(2)
memory usage: 4.4+ KB


In [122]:
# We are only interested in CMEs related to GSTs so keep only rows where the GST_ActivityID column contains 'GST'
# use the method 'contains()' from the str library.  
filt_df = expand_df[expand_df['GST_ActivityID'].str.contains('GST')]
filt_df.head()

Unnamed: 0,cmeID,startTime_CME,GST_ActivityID
21,2013-06-02T20:24:00-CME-001,2013-06-02 20:24:00+00:00,2013-06-07T03:00:00-GST-001
48,2013-09-29T22:40:00-CME-001,2013-09-29 22:40:00+00:00,2013-10-02T03:00:00-GST-001
90,2013-12-04T23:12:00-CME-001,2013-12-04 23:12:00+00:00,2013-12-08T00:00:00-GST-001
148,2014-02-16T14:15:00-CME-001,2014-02-16 14:15:00+00:00,2014-02-19T03:00:00-GST-001
151,2014-02-18T01:25:00-CME-001,2014-02-18 01:25:00+00:00,2014-02-20T03:00:00-GST-001


### GST Data

In [130]:
# Set the base URL to NASA's DONKI API:
base_url = "https://api.nasa.gov/DONKI/"

# Set the specifier for Geomagnetic Storms (GST):
GST = "GST"

# Search for GSTs between a begin and end date
startDate = "2013-05-01"
endDate   = "2024-05-01"

# Build URL for GST
gst_url = base_url + GST + "?" + "startDate=" + startDate + "&endDate=" + endDate + "&api_key=" + NASA_API_KEY
#print(gst_url)

In [131]:
# Make a "GET" request for the GST URL and store it in a variable named gst_response
gst_response = requests.get(gst_url).json()

In [187]:
# Convert the response variable to json and store it as a variable named gst_json
gst_json = json.dumps(gst_response)
# Preview the first result in JSON format
# Use json.dumps with argument indent=4 to format data
print(json.dumps(gst_json, indent=4))


"[{\"gstID\": \"2013-06-01T01:00:00-GST-001\", \"startTime\": \"2013-06-01T01:00Z\", \"allKpIndex\": [{\"observedTime\": \"2013-06-01T01:00Z\", \"kpIndex\": 6.0, \"source\": \"NOAA\"}], \"link\": \"https://webtools.ccmc.gsfc.nasa.gov/DONKI/view/GST/326/-1\", \"linkedEvents\": [{\"activityID\": \"2013-05-31T15:45:00-HSS-001\"}], \"submissionTime\": \"2013-07-15T19:26Z\", \"versionId\": 1}, {\"gstID\": \"2013-06-07T03:00:00-GST-001\", \"startTime\": \"2013-06-07T03:00Z\", \"allKpIndex\": [{\"observedTime\": \"2013-06-07T03:00Z\", \"kpIndex\": 6.0, \"source\": \"NOAA\"}], \"link\": \"https://webtools.ccmc.gsfc.nasa.gov/DONKI/view/GST/330/-1\", \"linkedEvents\": [{\"activityID\": \"2013-06-02T20:24:00-CME-001\"}], \"submissionTime\": \"2013-07-15T19:41Z\", \"versionId\": 1}, {\"gstID\": \"2013-06-29T03:00:00-GST-001\", \"startTime\": \"2013-06-29T03:00Z\", \"allKpIndex\": [{\"observedTime\": \"2013-06-29T03:00Z\", \"kpIndex\": 6.0, \"source\": \"NOAA\"}, {\"observedTime\": \"2013-06-29T06:

In [134]:
# Convert gst_json to a Pandas DataFrame  
data2 = json.loads(gst_json)
gst_df = pd.json_normalize(data2)
# Keep only the columns: activityID, startTime, linkedEvents
select_gst = gst_df[['gstID', 'startTime', 'linkedEvents']]
select_gst.head()

Unnamed: 0,gstID,startTime,linkedEvents
0,2013-06-01T01:00:00-GST-001,2013-06-01T01:00Z,[{'activityID': '2013-05-31T15:45:00-HSS-001'}]
1,2013-06-07T03:00:00-GST-001,2013-06-07T03:00Z,[{'activityID': '2013-06-02T20:24:00-CME-001'}]
2,2013-06-29T03:00:00-GST-001,2013-06-29T03:00Z,
3,2013-10-02T03:00:00-GST-001,2013-10-02T03:00Z,[{'activityID': '2013-09-29T22:40:00-CME-001'}...
4,2013-12-08T00:00:00-GST-001,2013-12-08T00:00Z,[{'activityID': '2013-12-04T23:12:00-CME-001'}...


In [155]:
# Notice that the linkedEvents column allows us to identify the corresponding CME
# Remove rows with missing 'linkedEvents' since we won't be able to assign these to CME
select_gst.dropna(subset=['linkedEvents'], inplace=True)
select_gst.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  select_gst.dropna(subset=['linkedEvents'], inplace=True)


Unnamed: 0,gstID,startTime,linkedEvents
0,2013-06-01T01:00:00-GST-001,2013-06-01T01:00Z,[{'activityID': '2013-05-31T15:45:00-HSS-001'}]
1,2013-06-07T03:00:00-GST-001,2013-06-07T03:00Z,[{'activityID': '2013-06-02T20:24:00-CME-001'}]
3,2013-10-02T03:00:00-GST-001,2013-10-02T03:00Z,[{'activityID': '2013-09-29T22:40:00-CME-001'}...
4,2013-12-08T00:00:00-GST-001,2013-12-08T00:00Z,[{'activityID': '2013-12-04T23:12:00-CME-001'}...
5,2014-02-19T03:00:00-GST-001,2014-02-19T03:00Z,[{'activityID': '2014-02-16T14:15:00-CME-001'}...


In [156]:
# Notice that the linkedEvents sometimes contains multiple events per row
# Use the explode method to ensure that each row is one element. Ensure to reset the index and drop missing values.
exploded_gst = select_gst.explode('linkedEvents')
exploded_gst.dropna(subset=['linkedEvents'], inplace=True)
exploded_gst.reset_index(drop=True,inplace=True)
exploded_gst.head()

Unnamed: 0,gstID,startTime,linkedEvents
0,2013-06-01T01:00:00-GST-001,2013-06-01T01:00Z,{'activityID': '2013-05-31T15:45:00-HSS-001'}
1,2013-06-07T03:00:00-GST-001,2013-06-07T03:00Z,{'activityID': '2013-06-02T20:24:00-CME-001'}
2,2013-10-02T03:00:00-GST-001,2013-10-02T03:00Z,{'activityID': '2013-09-29T22:40:00-CME-001'}
3,2013-10-02T03:00:00-GST-001,2013-10-02T03:00Z,{'activityID': '2013-10-02T01:54:00-IPS-001'}
4,2013-10-02T03:00:00-GST-001,2013-10-02T03:00Z,{'activityID': '2013-10-02T02:47:00-MPC-001'}


In [157]:
# Apply the extract_activityID_from_dict function to each row in the 'linkedEvents' column (you can use apply() and a lambda function)
# and create a new column called 'CME_ActivityID' using loc indexer:
exploded_gst['CME_ActivityID'] = exploded_gst['linkedEvents'].apply(lambda x: extract_activityID_from_dict(x))

# Remove rows with missing CME_ActivityID, since we can't assign them to CMEs:
exploded_gst.dropna(subset=['CME_ActivityID'], inplace=True)
exploded_gst.head()

Unnamed: 0,gstID,startTime,linkedEvents,CME_ActivityID
0,2013-06-01T01:00:00-GST-001,2013-06-01T01:00Z,{'activityID': '2013-05-31T15:45:00-HSS-001'},2013-05-31T15:45:00-HSS-001
1,2013-06-07T03:00:00-GST-001,2013-06-07T03:00Z,{'activityID': '2013-06-02T20:24:00-CME-001'},2013-06-02T20:24:00-CME-001
2,2013-10-02T03:00:00-GST-001,2013-10-02T03:00Z,{'activityID': '2013-09-29T22:40:00-CME-001'},2013-09-29T22:40:00-CME-001
3,2013-10-02T03:00:00-GST-001,2013-10-02T03:00Z,{'activityID': '2013-10-02T01:54:00-IPS-001'},2013-10-02T01:54:00-IPS-001
4,2013-10-02T03:00:00-GST-001,2013-10-02T03:00Z,{'activityID': '2013-10-02T02:47:00-MPC-001'},2013-10-02T02:47:00-MPC-001


In [158]:
# Convert the 'CME_ActivityID' column to string format 
exploded_gst['CME_ActivityID'] = exploded_gst['CME_ActivityID'].astype(str)
# Convert the 'gstID' column to string format 
exploded_gst['gstID'] = exploded_gst['gstID'].astype(str)
# Convert startTime to datetime format  
exploded_gst['startTime'] = pd.to_datetime(exploded_gst['startTime'])
# Rename startTime to startTime_GST 
exploded_gst = exploded_gst.rename(columns={'startTime': 'startTime_GST'})
# Drop linkedEvents
exploded_gst.drop('linkedEvents', axis=1, inplace=True)
# Verify that all steps were executed correctly
exploded_gst.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 205 entries, 0 to 204
Data columns (total 3 columns):
 #   Column          Non-Null Count  Dtype              
---  ------          --------------  -----              
 0   gstID           205 non-null    object             
 1   startTime_GST   205 non-null    datetime64[ns, UTC]
 2   CME_ActivityID  205 non-null    object             
dtypes: datetime64[ns, UTC](1), object(2)
memory usage: 4.9+ KB


In [159]:
# We are only interested in GSTs related to CMEs so keep only rows where the CME_ActivityID column contains 'CME'
# use the method 'contains()' from the str library.  
filt_gst = exploded_gst[exploded_gst['CME_ActivityID'].str.contains('CME')]
filt_gst.head()

Unnamed: 0,gstID,startTime_GST,CME_ActivityID
1,2013-06-07T03:00:00-GST-001,2013-06-07 03:00:00+00:00,2013-06-02T20:24:00-CME-001
2,2013-10-02T03:00:00-GST-001,2013-10-02 03:00:00+00:00,2013-09-29T22:40:00-CME-001
5,2013-12-08T00:00:00-GST-001,2013-12-08 00:00:00+00:00,2013-12-04T23:12:00-CME-001
7,2014-02-19T03:00:00-GST-001,2014-02-19 03:00:00+00:00,2014-02-16T14:15:00-CME-001
9,2014-02-20T03:00:00-GST-001,2014-02-20 03:00:00+00:00,2014-02-18T01:25:00-CME-001


### Merge both datatsets

In [162]:
# Now merge both datasets using 'gstID' and 'CME_ActivityID' for gst and 'GST_ActivityID' and 'cmeID' for cme. Use the 'left_on' and 'right_on' specifiers.
merge = pd.merge(filt_gst, filt_df, left_on=['gstID', 'CME_ActivityID'], right_on=['GST_ActivityID', 'cmeID'])
merge.head()


Unnamed: 0,gstID,startTime_GST,CME_ActivityID,cmeID,startTime_CME,GST_ActivityID
0,2013-06-07T03:00:00-GST-001,2013-06-07 03:00:00+00:00,2013-06-02T20:24:00-CME-001,2013-06-02T20:24:00-CME-001,2013-06-02 20:24:00+00:00,2013-06-07T03:00:00-GST-001
1,2013-10-02T03:00:00-GST-001,2013-10-02 03:00:00+00:00,2013-09-29T22:40:00-CME-001,2013-09-29T22:40:00-CME-001,2013-09-29 22:40:00+00:00,2013-10-02T03:00:00-GST-001
2,2013-12-08T00:00:00-GST-001,2013-12-08 00:00:00+00:00,2013-12-04T23:12:00-CME-001,2013-12-04T23:12:00-CME-001,2013-12-04 23:12:00+00:00,2013-12-08T00:00:00-GST-001
3,2014-02-19T03:00:00-GST-001,2014-02-19 03:00:00+00:00,2014-02-16T14:15:00-CME-001,2014-02-16T14:15:00-CME-001,2014-02-16 14:15:00+00:00,2014-02-19T03:00:00-GST-001
4,2014-02-20T03:00:00-GST-001,2014-02-20 03:00:00+00:00,2014-02-18T01:25:00-CME-001,2014-02-18T01:25:00-CME-001,2014-02-18 01:25:00+00:00,2014-02-20T03:00:00-GST-001


In [163]:
# Verify that the new DataFrame has the same number of rows as cme and gst
merge.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 6 columns):
 #   Column          Non-Null Count  Dtype              
---  ------          --------------  -----              
 0   gstID           6 non-null      object             
 1   startTime_GST   6 non-null      datetime64[ns, UTC]
 2   CME_ActivityID  6 non-null      object             
 3   cmeID           6 non-null      object             
 4   startTime_CME   6 non-null      datetime64[ns, UTC]
 5   GST_ActivityID  6 non-null      object             
dtypes: datetime64[ns, UTC](2), object(4)
memory usage: 416.0+ bytes


### Computing the time it takes for a CME to cause a GST

In [164]:
# Compute the time diff between startTime_GST and startTime_CME by creating a new column called `timeDiff`.
merge['timeDiff'] = merge['startTime_GST'] - merge['startTime_CME']
merge.head()

Unnamed: 0,gstID,startTime_GST,CME_ActivityID,cmeID,startTime_CME,GST_ActivityID,timeDiff
0,2013-06-07T03:00:00-GST-001,2013-06-07 03:00:00+00:00,2013-06-02T20:24:00-CME-001,2013-06-02T20:24:00-CME-001,2013-06-02 20:24:00+00:00,2013-06-07T03:00:00-GST-001,4 days 06:36:00
1,2013-10-02T03:00:00-GST-001,2013-10-02 03:00:00+00:00,2013-09-29T22:40:00-CME-001,2013-09-29T22:40:00-CME-001,2013-09-29 22:40:00+00:00,2013-10-02T03:00:00-GST-001,2 days 04:20:00
2,2013-12-08T00:00:00-GST-001,2013-12-08 00:00:00+00:00,2013-12-04T23:12:00-CME-001,2013-12-04T23:12:00-CME-001,2013-12-04 23:12:00+00:00,2013-12-08T00:00:00-GST-001,3 days 00:48:00
3,2014-02-19T03:00:00-GST-001,2014-02-19 03:00:00+00:00,2014-02-16T14:15:00-CME-001,2014-02-16T14:15:00-CME-001,2014-02-16 14:15:00+00:00,2014-02-19T03:00:00-GST-001,2 days 12:45:00
4,2014-02-20T03:00:00-GST-001,2014-02-20 03:00:00+00:00,2014-02-18T01:25:00-CME-001,2014-02-18T01:25:00-CME-001,2014-02-18 01:25:00+00:00,2014-02-20T03:00:00-GST-001,2 days 01:35:00


In [166]:
# Use describe() to compute the mean and median time 
# that it takes for a CME to cause a GST. 
summary = merge.describe()
summary

Unnamed: 0,timeDiff
count,6
mean,2 days 19:06:30
std,0 days 19:18:34.196247960
min,2 days 01:35:00
25%,2 days 06:26:15
50%,2 days 14:40:00
75%,2 days 22:44:45
max,4 days 06:36:00


### Exporting data in csv format

In [167]:
# Export data to CSV without the index
merge.to_csv('CME_GST_data.csv', index=False)