### Data request function

In [None]:
import requests
from bs4 import BeautifulSoup
import csv
import pandas as pd

def collect_data_from_online_and_preprocess(start_date,end_date):

  # OMNIWeb endpoint for data retrieval
  url = "https://omniweb.gsfc.nasa.gov/cgi/nx1.cgi"

  start_data=str(start_date);
  end_date=str(end_date);
  # Define request parameters
  params = {
      "activity": "retrieve",        # Correct activity value for list data
      "res": "day",              # Daily-averaged data
      "start_date": start_data,  # Start date (YYYYMMDD)
      "end_date": end_date,    # End date (YYYYMMDD)
      "vars": ["24", "30"],      # Flow Speed/ plasma speed(code: 24) and Sigma-Np(code:30)
      "sc_id": "ACE",             # Spacecraft ID (e.g., ACE, WIND)
      "spacecraft": "omni2",
  }

  # Show the request details before sending it
  #print(f"URL: {url}")
  #print(f"Parameters: {params}")

  # Send POST request
  print("1. Data Requesting please wait")
  print("*************************************************")
  response = requests.post(url, data=params)

  # Check if the request was successful
  if response.status_code == 200:
      print(f"2. Request was successful!")
      print(f"Response Status Code: {response.status_code}")
      print("*************************************************")
      print(f"Response Content: \n{response.text[:350]}")  # Print the first 100 characters of the response to prevent overwhelming output
  else:
      print(f"Error: Unable to retrieve data. Status Code: {response.status_code}")
      print(f"Response Content: {response.text}")

  print("3. Data Processing please wait")
  print("*************************************************")
  html_content = response.text  # Use `.text` for string data
  # Parse the HTML content using BeautifulSoup
  soup = BeautifulSoup(html_content, "html.parser")

  # Extract the content inside <pre> tags
  pre_content = soup.find("pre").text

  # Print the extracted content
  #print(pre_content)

  # Process the data into lines
  lines = pre_content.strip().split("\n")

  # Extract the header and data rows
  data_c = ((lines[1].split(" "))[2:4])
  data_colmn1 ="_".join(data_c)
  data_colmn2 = (lines[2].split(" "))[2][:-1]
  header = lines[4].split()
  header.remove("1")
  header.remove("2")
  header.insert(3, data_colmn1)
  header.insert(4, data_colmn2)
  data_rows = [line.split() for line in lines[5:]]

  # Write to a CSV file
  output_file = "plasma_data.csv"
  with open(output_file, mode="w", newline="") as file:
      writer = csv.writer(file)
      writer.writerow(header)  # Write the header row
      writer.writerows(data_rows)  # Write the data rows
  # Load the CSV file
  print(f"4. Data successfully written to {output_file}")
  print("*************************************************")

  # data load to memory as pd
  data = pd.read_csv('plasma_data.csv')
  print("5. Data Loaded Sucessfully to Memory")
  print("*************************************************")
  print("***************First Some Row********************")
  print("*************************************************")
  print(data.head())
  print("*************************************************")
  print("*************Data Description fresh data*********")
  print("*************************************************")
  print(data.describe())
  return data



### Data Cleaning Function

In [None]:
def data_cleaning(data):
  print("*************************************************")
  print("**********************Cleaning Data**************")
  print("*************************************************")

  # Remove rows where 'SW_Plasma' is 9999.0
  print("Remove rows where 'SW_Plasma' is 9999.0")
  data_cleaned = data[data['SW_Plasma'] != 9999.0]

  # Remove rows where 'sigma-n' is 999.9
  print("Remove rows where 'sigma-n' is 999.9")
  data_cleaned = data[data['sigma-n'] != 999.9]

  print("*************************************************")
  print("*************Data Description Cleaned Data*******")
  print("*************************************************")
  print(data_cleaned.describe())
  return data_cleaned

## Scatter ploting function

In [None]:
import matplotlib.pyplot as plt

def scatter_plot(data,type_data ):
  print("*************************************************")
  print("*************Scatter plotting *******************")
  print("*************************************************")
  # Create a scatter plot of Solar Wind Speed vs Density
  plt.figure(figsize=(8, 6))
  plt.scatter(y=data['SW_Plasma'],x=data['sigma-n'], color='blue', alpha=0.5)
  plt.title('Scatter Plot: Solar Wind Speed vs Density'+type_data)
  plt.ylabel('Solar Wind Speed (km/s)')
  plt.xlabel('Density (N/cm^3)')
  plt.grid(True)
  plt.show()


## Compute corelation function

In [None]:
# Compute the correlation coefficient between 'SW Plasma Speed' and 'sigma-n'
def compute_correlation(data):
  print("*************************************************")
  print("*************Describing Correlation**************")
  print("*************************************************")
  correlation = data['SW_Plasma'].corr(data['sigma-n'])
  print(f"Correlation between Solar Wind Speed and Density: {correlation:.2f}")

## Separate Data based on Solar wind Speed

In [None]:
def separte_corelation_and_polt_based_on_solar_wind(data):
  # Categorize into fast, medium, and slow
  fast_solar_wind = data[data['SW_Plasma'] > 700]
  slow_solar_wind = data[data['SW_Plasma'] < 445]
  medium_solar_wind = data[(data['SW_Plasma'] >= 445) & (data['SW_Plasma'] <= 700)]
  print("*************************************************")
  print("**Describing Corelationa for Fast Solar Wind**")
  print("*************************************************")
  scatter_plot(fast_solar_wind,"(Fast Solar Wind)")
  compute_correlation(fast_solar_wind)
  print("*************************************************")
  print("**Describing Corelationa for Medium Solar Wind**")
  print("*************************************************")
  scatter_plot(medium_solar_wind,"(Medium Solar Wind)")
  compute_correlation(medium_solar_wind)
  print("*************************************************")
  print("**Describing Corelationa for Slow Solar Wind**")
  print("*************************************************")
  scatter_plot(slow_solar_wind,"(Slow Solar Wind)")
  compute_correlation(slow_solar_wind)


## Main function

In [None]:
data = collect_data_from_online_and_preprocess(19960101 ,20081231) #Start date (YYYYMMDD) End date (YYYYMMDD)
plasma_data = data_cleaning(data);
scatter_plot(plasma_data,"(Overal)")
compute_correlation(plasma_data)
separte_corelation_and_polt_based_on_solar_wind(plasma_data)


In [None]:
data = collect_data_from_online_and_preprocess(20081231 ,20191231) #Start date (YYYYMMDD) End date (YYYYMMDD)
plasma_data = data_cleaning(data);
scatter_plot(plasma_data,"(Overal)")
compute_correlation(plasma_data)
separte_corelation_and_polt_based_on_solar_wind(plasma_data)