In [6]:
import csv
import os
from datetime import datetime, timedelta

def parse_ionex_file(input_file):
    with open(input_file, 'r') as file:
        lines = file.readlines()
    
    data_blocks = []
    in_data_block = False
    current_block = []
    
    i = 0
    while i < len(lines):
        line = lines[i].strip()
        if 'LAT/LON1/LON2' in line:
            in_data_block = True
            current_block = []
            # Take the next 5 lines
            for j in range(i + 1, i + 6):
                if j < len(lines):
                    current_block.extend(lines[j].strip().split())
            data_blocks.extend(current_block)  # Extend data_blocks directly with current_block
            # Move 5 lines ahead
            i += 5
        elif 'END OF TEC MAP' in line:
            data_blocks.append(None)  # Insert a None to indicate the end of a TEC map block (optional)
        i += 1

    return data_blocks

# Directory containing the IONEX files
input_directory = r'C:\Users\gauta\OneDrive - IIT Kanpur\Desktop\PROJECT\surge\Bijoy_work\Taiwan_2024'
output_file_path = os.path.join(input_directory, 'one.csv')

# Initialize a list to hold all columns
all_columns = []

# Date range
start_date = datetime.strptime('240214', '%y%m%d')
end_date = datetime.strptime('240405', '%y%m%d')

# Loop through the date range
current_date = start_date
while current_date <= end_date:
    input_file_name = current_date.strftime('%y%m%dt.txt')
    input_file_path = os.path.join(input_directory, input_file_name)
    
    if os.path.exists(input_file_path):
        data_blocks = parse_ionex_file(input_file_path)
        all_columns.append(data_blocks)
        print(f'Processed {input_file_name}')
    else:
        print(f'{input_file_name} does not exist')
    
    current_date += timedelta(days=1)

# Divide all elements by 10 and transpose the list of lists to align data into columns
max_length = max(len(column) for column in all_columns)
transposed_data = []

for row_index in range(max_length):
    row = []
    for column in all_columns:
        if row_index < len(column):
            try:
                value = float(column[row_index]) / 10  # Divide by 10
            except ValueError:
                value = column[row_index]  # In case of non-numeric values, keep the original value
            row.append(value)
        else:
            row.append('')
    transposed_data.append(row)

# Write the transposed data to a single CSV file
with open(output_file_path, 'w', newline='') as csvfile:
    csvwriter = csv.writer(csvfile)
    csvwriter.writerows(transposed_data)

print(f'All data written to {output_file_path}')


Processed 240214t.txt
Processed 240215t.txt
Processed 240216t.txt
Processed 240217t.txt
Processed 240218t.txt
Processed 240219t.txt
Processed 240220t.txt
Processed 240221t.txt
Processed 240222t.txt
Processed 240223t.txt
Processed 240224t.txt
Processed 240225t.txt
Processed 240226t.txt
Processed 240227t.txt
Processed 240228t.txt
Processed 240229t.txt
Processed 240301t.txt
Processed 240302t.txt
Processed 240303t.txt
Processed 240304t.txt
Processed 240305t.txt
Processed 240306t.txt
Processed 240307t.txt
Processed 240308t.txt
Processed 240309t.txt
Processed 240310t.txt
Processed 240311t.txt
Processed 240312t.txt
Processed 240313t.txt
Processed 240314t.txt
Processed 240315t.txt
Processed 240316t.txt
Processed 240317t.txt
Processed 240318t.txt
Processed 240319t.txt
Processed 240320t.txt
Processed 240321t.txt
Processed 240322t.txt
Processed 240323t.txt
Processed 240324t.txt
Processed 240325t.txt
Processed 240326t.txt
Processed 240327t.txt
Processed 240328t.txt
Processed 240329t.txt
Processed 

In [4]:
import pandas as pd
import numpy as np

# Function to calculate the median of every 3 consecutive elements in a row
def calculate_medians(row):
    medians = []
    for i in range(len(row) - 14):
        window = row[i:i + 15]
        ub = np.median(window) + 1.34 * np.std(window)  # Upper bound
        medians.append(ub)
    for i in range(len(row) - 14):
        window = row[i:i + 15]
        medians.append(np.median(window) - 1.34 * np.std(window))  # Lower bound
        
    return medians

# Load the CSV file
file_path = r'C:\Users\gauta\OneDrive - IIT Kanpur\Desktop\one.csv'
data = pd.read_csv(file_path, header=None)

# Calculate medians for each row and store them in a list of lists
all_medians = []
for idx, row in data.iterrows():
    medians_of_row = calculate_medians(row)
    all_medians.append(medians_of_row)

# Convert the list of lists into a new DataFrame
medians_df = pd.DataFrame(all_medians)

# Concatenate the original DataFrame with the new medians DataFrame
updated_data = pd.concat([data, medians_df], axis=1)

# Save the updated DataFrame to a new CSV file
output_path = r'C:\Users\gauta\OneDrive - IIT Kanpur\Desktop\output.csv'
updated_data.to_csv(output_path, index=False)

print("Medians of every 3 consecutive elements in each row have been calculated and appended.")
print("New CSV file saved to:", output_path)

Medians of every 3 consecutive elements in each row have been calculated and appended.
New CSV file saved to: C:\Users\gauta\OneDrive - IIT Kanpur\Desktop\output.csv


In [2]:
import pandas as pd
file_path = r'C:\Users\gauta\OneDrive - IIT Kanpur\Desktop\one.csv'
data = pd.read_csv(file_path, header=None)

In [5]:
data


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,42,43,44,45,46,47,48,49,50,51
0,19.8,14.5,18.3,20.2,20.0,15.9,17.4,15.0,17.8,16.7,...,19.8,25.6,28.2,28.5,22.7,25.7,18.3,19.0,19.2,16.6
1,20.1,14.6,18.5,20.3,20.2,16.1,17.6,15.2,18.0,16.8,...,20.0,25.8,28.4,28.6,22.9,25.9,18.4,19.1,19.2,16.7
2,20.4,14.6,18.7,20.3,20.5,16.2,17.8,15.2,18.2,16.9,...,20.2,25.9,28.5,28.8,23.2,26.1,18.5,19.3,19.3,16.9
3,20.7,14.7,18.9,20.3,20.6,16.4,17.9,15.4,18.3,17.0,...,20.3,26.0,28.6,28.9,23.4,26.3,18.6,19.3,19.4,17.0
4,20.9,14.7,19.0,20.3,20.8,16.5,18.0,15.4,18.5,17.1,...,20.5,26.1,28.7,29.0,23.5,26.5,18.7,19.4,19.4,17.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
124387,27.2,18.0,25.6,20.5,19.8,20.2,17.5,22.5,23.6,21.6,...,18.2,21.6,15.5,11.9,17.6,14.6,14.1,13.9,18.1,11.7
124388,27.3,18.1,25.7,20.4,19.9,20.2,17.6,22.6,23.8,21.7,...,18.3,21.7,15.6,11.9,17.6,14.7,14.1,13.9,18.1,11.7
124389,27.4,18.2,25.9,20.4,19.9,20.1,17.8,22.7,23.9,21.8,...,18.4,21.8,15.8,11.9,17.6,14.7,14.1,14.0,18.1,11.7
124390,27.6,18.2,25.9,20.3,20.1,20.1,17.8,22.8,24.1,21.8,...,18.5,21.9,15.9,11.9,17.6,14.7,14.1,14.1,18.1,11.7


In [None]:
import pandas as pd
import numpy as np

# Function to calculate the median of every 15 consecutive elements in a row
def calculate_medians(row):
    upper_bounds = []
    lower_bounds = []
    # Just to make sure that we don't consider the last window.
    # so put len(row) - 15
    for i in range(len(row) - 15):
        window = row[i:i + 15]
        median = np.median(window)
        std_dev = np.std(window)
        upper_bound = median + 1.34 * std_dev  # Upper bound
        lower_bound = median - 1.34 * std_dev  # Lower bound
        upper_bounds.append(upper_bound)
        lower_bounds.append(lower_bound)
    return upper_bounds, lower_bounds

# Load the CSV file
file_path = r'C:\Users\gauta\OneDrive - IIT Kanpur\Desktop\one.csv'
data = pd.read_csv(file_path, header=None)

# Calculate medians for each row and store them in a list of lists
all_upper_bounds = []
all_lower_bounds = []
for idx, row in data.iterrows():
    upper_bounds, lower_bounds = calculate_medians(row)
    all_upper_bounds.append(upper_bounds)
    all_lower_bounds.append(lower_bounds)

# Convert the list of lists into new DataFrames
upper_bounds_df = pd.DataFrame(all_upper_bounds)
lower_bounds_df = pd.DataFrame(all_lower_bounds)

# Concatenate the original DataFrame with the new medians DataFrame
updated_data = pd.concat([data, upper_bounds_df, lower_bounds_df], axis=1)

# Save the updated DataFrame to a new CSV file
output_path = r'C:\Users\gauta\OneDrive - IIT Kanpur\Desktop\Final_output.csv'
updated_data.to_csv(output_path, index=False)

print("Upper and lower bounds for medians of every 15 consecutive elements in each row have been calculated and appended.")
print("New CSV file saved to:", output_path)


Enter the Latitude: 86
Enter the Longitude: 101
85.0 100


In [None]:
import pandas as pd
import numpy as np

# Take input for two floating-point numbers in Jupyter Notebook
lat_start = float(input("Enter the starting latitude (e.g., 87.5): "))
lat_end = float(input("Enter the ending latitude (e.g., -87.5): "))

# Define the number of points
num_lat_points = 71
num_lon_points = 73

# Calculate the step size for latitude and longitude
latitudes = np.linspace(lat_start, lat_end, num_lat_points)
longitudes = np.linspace(-180, 180, num_lon_points)

# Generate all combinations of latitude and longitude
lat_lon_combinations = [(lat, lon) for lat in latitudes for lon in longitudes]

# Create a DataFrame from the combinations
df = pd.DataFrame(lat_lon_combinations, columns=['Latitude', 'Longitude'])

# Repeat the DataFrame 24 times
df_repeated = pd.concat([df] * 24, ignore_index=True)

# Save the DataFrame to a CSV file
output_path = r'C:\Users\gauta\OneDrive - IIT Kanpur\Desktop\lat_lon_repeated.csv'
df_repeated.to_csv(output_path, index=False)

print("Latitude and Longitude combinations have been repeated 24 times and saved to CSV.")


In [14]:
# Dataset Imported and their upper and lower bound created..
# Remove the first 15 columns from TEc


In [15]:
# Program to generate all Latitude and longitude

import pandas as pd
import numpy as np

# Define the number of points
num_lat_points = 71
num_lon_points = 73

# Calculate the step size for latitude and longitude
latitudes = np.linspace(87.5, -87.5, num_lat_points)
longitudes = np.linspace(-180, 180, num_lon_points)

# Generate all combinations of latitude and longitude
lat_lon_combinations = [(lat, lon) for lat in latitudes for lon in longitudes]

# Create a DataFrame from the combinations
df = pd.DataFrame(lat_lon_combinations, columns=['Latitude', 'Longitude'])

# Repeat the DataFrame 24 times
df_repeated = pd.concat([df] * 24, ignore_index=True)

# Save the DataFrame to a CSV file
# output_path = r'C:\Users\gauta\OneDrive - IIT Kanpur\Desktop\lat_lon_repeated.csv'
# df_repeated.to_csv(output_path, index=False)

# print("Latitude and Longitude combinations have been repeated 24 times and saved to CSV.")



In [16]:
# Porgram to get desired Latitude and Longitude..
# Generating Nearest Latitude and Longitude.

Lat = float(input("Enter the Latitude: "))
Lon = float(input("Enter the Longitude: "))

# Find nearest multiple of 2.5 from latitude

Lat = round(Lat / 2.5) * 2.5
Lon = round(Lon / 5) * 5
print(Lat, Lon)


KeyboardInterrupt: Interrupted by user

In [1]:
# To search for desired lat and long and separate it as a file.

import pandas as pd

# Load the CSV file
file_path = r'C:\Users\gauta\OneDrive - IIT Kanpur\Desktop\PROJECT\surge\Bijoy_work\Taiwan_2024\csv files\1_initial_dataset.csv'
data = pd.read_csv(file_path)
data

# Take input for the desired latitude and longitude
Lat = float(input("Enter the desired latitude: "))
Lon = float(input("Enter the desired longitude: "))
Lat = round(Lat / 2.5) * 2.5
Lon = round(Lon / 5) * 5


# Filter the rows that match the desired latitude and longitude
filtered_data = data[(data['Latitude'] == Lat) & (data['Longitude'] == Lon)]

# Check if there are any matching rows
if filtered_data.empty:
    print("No matching rows found.")
else:
    # Save the filtered rows to a new CSV file
    output_path = r'C:\Users\gauta\OneDrive - IIT Kanpur\Desktop\PROJECT\surge\Bijoy_work\Taiwan_2024\csv files\SEPARATED_WITH_LAT_AND_LONG.csv'
    filtered_data.to_csv(output_path, index=False)
    print("Filtered rows have been saved to CSV.")
    print(f"Number of matching rows: {len(filtered_data)}")
    print("New CSV file saved to:", output_path)


FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\gauta\\OneDrive - IIT Kanpur\\Desktop\\PROJECT\\surge\\Bijoy_work\\Taiwan_2024\\csv files\\1_initial_dataset.csv'

In [3]:
# Program for dates repeating

import pandas as pd

# Load the CSV file
file_path = r'C:\Users\gauta\OneDrive - IIT Kanpur\Desktop\PROJECT\surge\Bijoy_work\Taiwan_2024\csv files\6_filtered_lat_lon.csv'
df = pd.read_csv(file_path)

# Remove the first two columns (latitude and longitude)
df = df.iloc[:, 2:]

# Extract only the first 37 dates from the header (first row) after removing latitude and longitude
dates = list(df.columns[:37])

# Initialize an empty list to store repeated dates
repeated_dates = []

# Repeat each date 24 times sequentially
for date in dates:
    repeated_dates.extend([date] * 24)

# Initialize a new DataFrame for flattened data including dates
flattened_df = pd.DataFrame({'Date': repeated_dates})

# Flatten each set of 37 columns into new columns
num_cols = df.shape[1]
num_sets = num_cols // 37

for i in range(num_sets):
    start_col = i * 37
    end_col = (i + 1) * 37
    subset = df.iloc[:, start_col:end_col].stack().reset_index(drop=True)
    if i == 0:
        flattened_df['TEC'] = subset
    elif i == 1:
        flattened_df['UB'] = subset
    elif i == 2:
        flattened_df['LB'] = subset

# Save the flattened DataFrame to a new CSV file
output_path = r'C:\Users\gauta\OneDrive - IIT Kanpur\Desktop\PROJECT\surge\Bijoy_work\Taiwan_2024\csv files\output_with_repeated_dates.csv'
flattened_df.to_csv(output_path, index=False)

print("DataFrame has been flattened into three columns with each date repeated 24 times sequentially in the 'Date' column.")
print("CSV file saved to:", output_path)


In [2]:
# program for dates not repeating leaving 23 blankspaces.
import pandas as pd
import numpy as np

# Load the CSV file
file_path = r'C:\Users\gauta\OneDrive - IIT Kanpur\Desktop\PROJECT\surge\Bijoy_work\Taiwan_2024\csv files\6_filtered_lat_lon.csv'
df = pd.read_csv(file_path)

# Remove the first two columns (latitude and longitude)
df = df.iloc[:, 2:]

# Extract only the first 37 dates from the header (first row) after removing latitude and longitude
dates = list(df.columns[:37])

# Initialize lists to hold the 'Date' column and NaN values
Date = []
NaNs = [np.nan] * 24  # List of 24 NaN values

# Construct the 'Date' column with NaNs in between
for i in range(24 * 37):
    if i % 24 == 0:
        Date.append(dates[i // 24])  # Append the date every 24 rows
    else:
        Date.append(np.nan)  # Append NaN for other rows

# Create a new DataFrame with 'Date', 'TEC', 'UB', 'LB' columns
flattened_df = pd.DataFrame({
    'Date': Date,
    'TEC': np.nan,
    'UB': np.nan,
    'LB': np.nan
})

# Flatten each set of 37 columns into new columns
num_cols = df.shape[1]
num_sets = num_cols // 37

for i in range(num_sets):
    start_col = i * 37
    end_col = (i + 1) * 37
    subset = df.iloc[:, start_col:end_col].values.flatten()
    column_name = f'Column_{i + 1}'
    if i == 0:
        flattened_df['TEC'] = subset
    elif i == 1:
        flattened_df['UB'] = subset
    elif i == 2:
        flattened_df['LB'] = subset

# Save the flattened DataFrame to a new CSV file
output_path = r'C:\Users\gauta\OneDrive - IIT Kanpur\Desktop\PROJECT\surge\Bijoy_work\Taiwan_2024\csv files\WRONG_OUTPUT.csv'
flattened_df.to_csv(output_path, index=False)

print("DataFrame has been flattened into three columns (TEC, UB, LB) with each date in the 'Date' column, followed by 23 NaNs.")
print("CSV file saved to:", output_path)


DataFrame has been flattened into three columns (TEC, UB, LB) with each date in the 'Date' column, followed by 23 NaNs.
CSV file saved to: C:\Users\gauta\OneDrive - IIT Kanpur\Desktop\PROJECT\surge\Bijoy_work\Taiwan_2024\csv files\WRONG_OUTPUT.csv


In [6]:
import pandas as pd
import numpy as np

# Load the CSV file
file_path = r'C:\Users\gauta\OneDrive - IIT Kanpur\Desktop\PROJECT\surge\Bijoy_work\Taiwan_2024\csv files\7_52_days_filtered_lat_lon_data.csv'
df = pd.read_csv(file_path)

# Remove the first two columns (latitude and longitude)
df = df.iloc[:, 2:]

# Number of columns in the original DataFrame after removing latitude and longitude columns
num_cols = df.shape[1]

# Ensure that we have exactly 111 columns (37 for each of TEC, UB, and LB)
if num_cols != 111:
    raise ValueError(f"Expected 111 columns (37 each for TEC, UB, and LB), but found {num_cols} columns.")

# Flatten each set of 37 columns into separate lists
TEC = df.iloc[:, :37].values.flatten()
UB = df.iloc[:, 37:74].values.flatten()
LB = df.iloc[:, 74:111].values.flatten()

# Repeat the first 37 dates to match the length of the flattened columns
dates = np.tile(df.columns[:37], df.shape[0])

# Create a new DataFrame with the flattened columns
flattened_df = pd.DataFrame({
    'Date': np.concatenate([dates, dates, dates]),
    'TEC': TEC,
    'UB': UB,
    'LB': LB
})

# Save the flattened DataFrame to a new CSV file
output_path = r'C:\Users\gauta\OneDrive - IIT Kanpur\Desktop\PROJECT\surge\Bijoy_work\Taiwan_2024\csv files\UPDATED_OUTPUT.csv'
flattened_df.to_csv(output_path, index=False)

print("DataFrame has been flattened into three columns (TEC, UB, LB) with each date in the 'Date' column.")
print("CSV file saved to:", output_path)


ValueError: All arrays must be of the same length