In [1]:
# Step 1: Mount Google Drive if your files are in Google Drive
from google.colab import drive
drive.mount('/content/drive')

import os
import glob
import csv
import pandas as pd


Mounted at /content/drive


In [None]:
# Step: Get all files with the '.cmn' extension in the folder
def convert_cmn_to_txt(Cmn_file):
  # Define the new file name with .txt extension
  new_file = Cmn_file.replace('.cmn', '.txt')

  try:
      # Open the .cmn file and read its content
      with open(Cmn_file, 'r') as file:
          content = file.read()
          #print(content)
  except Exception as e:
      print(f"Error processing {Cmn_file}: {str(e)}")
  return content

In [None]:
def Preprocess_Cmn_into_CSV(file_dir,CSV_output_folder):
  file_text = convert_cmn_to_txt(file_dir)
  #Get the year Month and Day
  filename_with_extension = os.path.basename(file_dir)
  filename_base = filename_with_extension.replace('.Cmn', '')
  csv_file_name= filename_base;

  time_stamp = filename_base.split("-")
  Year = time_stamp[1];
  Month = time_stamp[2];
  Day = time_stamp[3];

  # Process the data into lines
  lines = file_text.strip().split("\n")
  header = lines[4].split()
  data_rows = [line.split() for line in lines[5:]]
  header.insert(0,"Year")
  header.insert(1,"Month")
  header.insert(2,"Day")

  for row in data_rows:
      row.insert(0,Year)
      row.insert(1,Month)
      row.insert(2,Day)

  # Write to a CSV file
  output_file = CSV_output_folder+csv_file_name+".csv";
  with open(output_file, mode="w", newline="") as file:
      writer = csv.writer(file)
      writer.writerow(header)  # Write the header row
      writer.writerows(data_rows)  # Write the data rows
  print(f"CSV file '{filename_base}' created successfully.")
  return output_file

In [None]:
def Filter_data(csv_file):
  load_data = pd.read_csv(csv_file)
  load_data.drop(['MJdatet', 'PRN','Az','Ele','S4' ], axis=1, inplace=True)
  # Filtering rows where Lat is between 20.00 and 27.00, and Lon is between 88 and 93
  filtered_data = load_data[(load_data['Lat'] >= 20.00) & (load_data['Lat'] <= 27.00) & (load_data['Lon'] >= 88) & (load_data['Lon'] <= 93)]
  filtered_data.to_csv(csv_file, index=False)
  print(f"Filtered CSV file '{csv_file}' filtered and replaced successfully.")


In [None]:
#combine all the csv file
def combined_data(CSV_output_folder, destination_path):
  # List to hold each DataFrame
  dfs = []
  # Loop through each file in the folder
  for filename in os.listdir(CSV_output_folder):
      # Check if the file is a CSV
      if filename.endswith('.csv'):
          file_path = os.path.join(CSV_output_folder, filename)
          # Read the CSV file and append to the list
          df = pd.read_csv(file_path)
          dfs.append(df)
          print(file_path+"  Added to combine csv file")

  # Concatenate all DataFrames into one
  combined_data = pd.concat(dfs, ignore_index=True)
  # Save the combined data to a new CSV file
  combined_data.to_csv(destination_path+'combined_data_2022_2023.csv', index=False)
  # Optionally print the combined data to verify
  print("Combined all csv file successfully")


# Main Functional

In [None]:
# Path to the 'cmn' folder (update the path if the folder is elsewhere)
CMN_folder_path = '/content/drive/MyDrive/Sparrso_Data/CNM_File_2022_2023'  # Update this to the correct path
CSV_output_folder = "/content/drive/MyDrive/Sparrso_Data/CSV_FILE_2022_2023/"

In [None]:
# convert the CMN file into CSV file
cmn_files = glob.glob(os.path.join(CMN_folder_path, '*.Cmn'))  # Only .cmn files
for file in cmn_files:
  output_file = Preprocess_Cmn_into_CSV(file,CSV_output_folder)



CSV file 'khl2001-2023-01-01' created successfully.
CSV file 'khl2002-2023-01-02' created successfully.
CSV file 'khl2003-2023-01-03' created successfully.
CSV file 'khl2004-2023-01-04' created successfully.
CSV file 'khl2005-2023-01-05' created successfully.
CSV file 'khl2006-2023-01-06' created successfully.
CSV file 'khl2007-2023-01-07' created successfully.
CSV file 'khl2008-2023-01-08' created successfully.
CSV file 'khl2009-2023-01-09' created successfully.
CSV file 'khl2010-2023-01-10' created successfully.
CSV file 'khl2011-2023-01-11' created successfully.
CSV file 'khl2012-2023-01-12' created successfully.
CSV file 'khl2013-2022-01-13' created successfully.
CSV file 'khl2013-2023-01-13' created successfully.
CSV file 'khl2014-2022-01-14' created successfully.
CSV file 'khl2014-2023-01-14' created successfully.
CSV file 'khl2015-2022-01-15' created successfully.
CSV file 'khl2015-2023-01-15' created successfully.
CSV file 'khl2016-2022-01-16' created successfully.
CSV file 'kh

In [None]:
#Filter data based on location(lat between 20.00 and 27.00, long between 88 and 93) and drop unnecessary column
csc_files = glob.glob(os.path.join(CSV_output_folder, '*.csv'))  # Only .cmn files
for file in csc_files:
  Filter_data(file)

Filtered CSV file '/content/drive/MyDrive/Sparrso_Data/CSV_FILE_2022_2023/khl2001-2023-01-01.csv' filtered and replaced successfully.
Filtered CSV file '/content/drive/MyDrive/Sparrso_Data/CSV_FILE_2022_2023/khl2002-2023-01-02.csv' filtered and replaced successfully.
Filtered CSV file '/content/drive/MyDrive/Sparrso_Data/CSV_FILE_2022_2023/khl2003-2023-01-03.csv' filtered and replaced successfully.
Filtered CSV file '/content/drive/MyDrive/Sparrso_Data/CSV_FILE_2022_2023/khl2004-2023-01-04.csv' filtered and replaced successfully.
Filtered CSV file '/content/drive/MyDrive/Sparrso_Data/CSV_FILE_2022_2023/khl2005-2023-01-05.csv' filtered and replaced successfully.
Filtered CSV file '/content/drive/MyDrive/Sparrso_Data/CSV_FILE_2022_2023/khl2006-2023-01-06.csv' filtered and replaced successfully.
Filtered CSV file '/content/drive/MyDrive/Sparrso_Data/CSV_FILE_2022_2023/khl2007-2023-01-07.csv' filtered and replaced successfully.
Filtered CSV file '/content/drive/MyDrive/Sparrso_Data/CSV_FIL

In [None]:
#combine all the csv file
destination_path = '/content/drive/MyDrive/Sparrso_Data/Combined_data/'
combined_data(CSV_output_folder, destination_path)

/content/drive/MyDrive/Sparrso_Data/CSV_FILE_2022_2023/khl2001-2023-01-01.csvAdded to combine csv file
/content/drive/MyDrive/Sparrso_Data/CSV_FILE_2022_2023/khl2002-2023-01-02.csvAdded to combine csv file
/content/drive/MyDrive/Sparrso_Data/CSV_FILE_2022_2023/khl2003-2023-01-03.csvAdded to combine csv file
/content/drive/MyDrive/Sparrso_Data/CSV_FILE_2022_2023/khl2004-2023-01-04.csvAdded to combine csv file
/content/drive/MyDrive/Sparrso_Data/CSV_FILE_2022_2023/khl2005-2023-01-05.csvAdded to combine csv file
/content/drive/MyDrive/Sparrso_Data/CSV_FILE_2022_2023/khl2006-2023-01-06.csvAdded to combine csv file
/content/drive/MyDrive/Sparrso_Data/CSV_FILE_2022_2023/khl2007-2023-01-07.csvAdded to combine csv file
/content/drive/MyDrive/Sparrso_Data/CSV_FILE_2022_2023/khl2008-2023-01-08.csvAdded to combine csv file
/content/drive/MyDrive/Sparrso_Data/CSV_FILE_2022_2023/khl2009-2023-01-09.csvAdded to combine csv file
/content/drive/MyDrive/Sparrso_Data/CSV_FILE_2022_2023/khl2010-2023-01-10

In [None]:
data = pd.read_csv('/content/drive/MyDrive/Sparrso_Data/Combined_data/combined_data_2022_2023.csv')
data

Unnamed: 0,Year,Month,Day,Time,Lat,Lon,Stec,Vtec
count,13918140.0,13918140.0,13918140.0,13918140.0,13918140.0,13918140.0,13918140.0,13918140.0
mean,2022.505,6.629871,15.84615,12.05396,23.58987,90.32984,36.0044,32.91168
std,0.499971,3.403491,8.772295,6.956168,1.712626,1.397806,33.81751,28.0161
min,2022.0,1.0,1.0,-24.0,20.0,88.0,-56.98,-45.7
25%,2022.0,4.0,8.0,6.0125,22.236,89.167,8.53,9.51
50%,2023.0,7.0,16.0,12.12917,23.815,90.192,27.04,24.95
75%,2023.0,10.0,23.0,18.0875,25.091,91.513,56.8,49.12
max,2023.0,12.0,31.0,23.99583,27.0,93.0,219.19,149.13
