In [42]:
import pandas as pd
import numpy as np

# Load the Excel files
visakhapatnam_df = pd.read_excel('VISAKHAPATNAM_P2017.xlsx', header=0)
seventeen_df = pd.read_excel('17_17.xlsx', header=0)

# Assuming 'Date' column might have different naming or formatting issues, 
# we can inspect and rename the columns if necessary
seventeen_df.columns = seventeen_df.columns.str.strip()  # Remove any leading/trailing whitespace

# Convert values in columns D, F, H, J from meters to centimeters
for col in ['D', 'F', 'H', 'J']:
    visakhapatnam_df[col] = visakhapatnam_df[col] * 100

# Function to convert time values
def convert_time(value):
    if pd.isna(value):
        return None
    value = str(int(value)).zfill(4)
    hours = int(value[:2])
    minutes = int(value[2:])
    return hours + minutes / 60

# Apply the time conversion function to columns C, E, G, I
for col in ['C', 'E', 'G', 'I']:
    visakhapatnam_df[col] = visakhapatnam_df[col].apply(convert_time)

# Interpolation function
def interpolate(hour, minute, row):
    hour_val = row[str(hour+1)]
    next_hour_val = row[str(hour + 2)]
    interpolated_val = hour_val + (next_hour_val - hour_val) * (minute / 60)
    return interpolated_val

# Ensure there is a 'Date' column in visakhapatnam_df
if 'Date' not in visakhapatnam_df.columns:
    visakhapatnam_df['Date'] = pd.date_range(start='1/1/2017', periods=len(visakhapatnam_df), freq='D')

# List to store differences
differences = []

# Perform interpolation and calculate differences
for index, row in visakhapatnam_df.iterrows():
    date_row = seventeen_df[seventeen_df['Date'] == row['Date']]
    if date_row.empty:
        continue
    date_row = date_row.iloc[0]
    for col, meter_col in zip(['C', 'E', 'G'], ['D', 'F', 'H']):
        time_value = row[col]
        if time_value is None:
            continue
        hour = int(time_value)
        minute = int((time_value - hour) * 60)
        interpolated_val = interpolate(hour, minute, date_row)
        original_val = row[meter_col]
        differences.append(original_val - interpolated_val)

# Calculate the standard deviation
std_deviation = np.std(differences)

print("Standard Deviation:", std_deviation)


Standard Deviation: 8.800457781451582
