In [None]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns 

plt.style.use("ggplot")

In [None]:
df = pd.read_excel("datafiles/Insy6500_Project_Dataset_Updated.xlsx")
df_raw = df.copy()
df.head()


In [None]:
# Ensure timestamps are always parsed correctly
def fix_timestamp(df):
    df['timestamp'] = pd.to_datetime(df['timestamp'], format="%m/%d/%Y %H:%M:%S:%f")
    return df

df_raw = fix_timestamp(df_raw)
df     = fix_timestamp(df)


In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.isnull().sum()

In [None]:
df['timestamp'] = pd.to_datetime(
    df['timestamp'], 
    format="%m/%d/%Y %H:%M:%S:%f"
)

df['hours_since_start'] = (df['timestamp'] - df['timestamp'].min()) / pd.Timedelta(hours=1)
df[['timestamp','hours_since_start']].head()


In [None]:
(df._get_numeric_data() < 0).sum()


In [None]:
df[['Resistor R1 40C','Resistor R1 60C','Resistor R1 85C','Resistor R1 125C']].describe()

In [None]:
df[['Gain (Vout/Vin) 40C','Gain (Vout/Vin) 60C','Gain (Vout/Vin) 85C','Gain (Vout/Vin) 125C']].describe()

In [None]:
df_raw['Gain (Vout/Vin) 125C'].unique()


In [None]:
df_raw[['Resistor R1 40C',
        'Resistor R1 60C',
        'Resistor R1 85C',
        'Resistor R1 125C']].min()


In [None]:
df_raw[['Resistor R2 85C',
        'Resistor R2 125C']].min()


In [None]:
df = df_raw.copy()

gain_cols = [
    'Gain (Vout/Vin) 40C','Gain (Vout/Vin) 60C',
    'Gain (Vout/Vin) 85C','Gain (Vout/Vin) 125C'
]

res_cols = [
    'Resistor R1 40C','Resistor R1 60C','Resistor R1 85C','Resistor R1 125C',
    'Resistor R2 85C','Resistor R2 125C'
]

for col in gain_cols:
    df[col] = df[col].where(df[col] >= 0, np.nan)

for col in res_cols:
    df[col] = df[col].where(df[col] > 0, np.nan)

for col in res_cols:
    Q1 = df[col].quantile(0.25)
    Q3 = df[col].quantile(0.75)
    IQR = Q3 - Q1
    lower = Q1 - 3 * IQR     
    upper = Q3 + 3 * IQR
    df[col] = df[col].where((df[col] >= lower) & (df[col] <= upper))


In [None]:
df.isna().sum()


In [None]:
for col in ['Resistor R1 40C','Resistor R1 60C','Resistor R1 85C','Resistor R1 125C']:
    df[col + "_delta"] = df[col] - df[col].iloc[0]

df.head()


In [None]:
plt.figure(figsize=(12,6))
plt.plot(df['timestamp'], df['Resistor R1 40C'], label='R1 40°C')
plt.plot(df['timestamp'], df['Resistor R1 60C'], label='R1 60°C')
plt.plot(df['timestamp'], df['Resistor R1 85C'], label='R1 85°C')
plt.plot(df['timestamp'], df['Resistor R1 125C'], label='R1 125°C')
plt.legend()
plt.xlabel("Time")
plt.ylabel("Resistance (Ohms)")
plt.title("Resistor R1 Over Time")
plt.show()


In [None]:
df['timestamp'] = pd.to_datetime(df['timestamp'], format="%m/%d/%Y %H:%M:%S:%f")
df['hours_since_start'] = (df['timestamp'] - df['timestamp'].min()) / pd.Timedelta(hours=1)


In [None]:
plt.figure(figsize=(12,6))

for col in ['Resistor R1 40C','Resistor R1 60C','Resistor R1 85C','Resistor R1 125C']:
    plt.scatter(df['hours_since_start'], df[col], s=10, alpha=0.5, label=col)

plt.legend()
plt.xlabel("Hours Since Start")
plt.ylabel("Resistance (Ohms)")
plt.title("Scatter Plot of R1 Drift Over Time")
plt.show()


In [None]:
plt.figure(figsize=(12,6))

plt.plot(df['timestamp'], df['Resistor R2 85C'], label='R2 85°C')
plt.plot(df['timestamp'], df['Resistor R2 125C'], label='R2 125°C')

plt.legend()
plt.xlabel("Time")
plt.ylabel("Resistance (Ohms)")
plt.title("Resistor R2 Over Time")
plt.show()


In [None]:
plt.figure(figsize=(12,6))

for col in ['Resistor R2 85C', 'Resistor R2 125C']:
    plt.scatter(df['hours_since_start'], df[col], s=10, alpha=0.5, label=col)

plt.legend()
plt.xlabel("Hours Since Start")
plt.ylabel("Resistance (Ohms)")
plt.title("Scatter Plot of R2 Drift Over Time")
plt.show()


In [None]:
plt.figure(figsize=(12,6))
plt.plot(df['timestamp'], df['Gain (Vout/Vin) 40C'], label='Gain 40°C')
plt.plot(df['timestamp'], df['Gain (Vout/Vin) 60C'], label='Gain 60°C')
plt.plot(df['timestamp'], df['Gain (Vout/Vin) 85C'], label='Gain 85°C')
plt.legend()
plt.xlabel("Time")
plt.ylabel("Gain")
plt.title("Gain Over Time Across Temperatures")
plt.show()


In [None]:
plt.figure(figsize=(12,6))
plt.plot(df['timestamp'], df['Gain (Vout/Vin) 125C'], label='Gain 125°C')
plt.legend()
plt.xlabel("Time")
plt.ylabel("Gain")
plt.title("Gain Over Time Across 125°C")
plt.show()

In [None]:
temps = ['Resistor R1 40C', 'Resistor R1 60C', 'Resistor R1 85C', 'Resistor R1 125C']

for col in temps:
    plt.figure(figsize=(10,5))
    sns.histplot(df[col], kde=True)
    plt.title(f"Distribution of {col}")
    plt.xlabel("Resistance (Ohms)")
    plt.ylabel("Count")
    plt.show()


In [None]:
temps_r2 = ['Resistor R2 85C', 'Resistor R2 125C']

for col in temps_r2:
    plt.figure(figsize=(10,5))
    sns.histplot(df[col], kde=True)
    plt.title(f"Distribution of {col}")
    plt.xlabel("Resistance (Ohms)")
    plt.ylabel("Count")
    plt.show()


In [None]:
plt.figure(figsize=(12,10))

gain_cols = [
    'Gain (Vout/Vin) 40C',
    'Gain (Vout/Vin) 60C',
    'Gain (Vout/Vin) 85C',
    'Gain (Vout/Vin) 125C'
]

for i, col in enumerate(gain_cols, 1):
    plt.subplot(2, 2, i)
    sns.histplot(df[col], kde=True)
    plt.title(f"Distribution of {col}")
    plt.xlabel("Gain")
    plt.ylabel("Count")

plt.tight_layout()
plt.show()


In [None]:
plt.figure(figsize=(12,6))

sns.boxplot(
    data=df[['Resistor R1 40C',
             'Resistor R1 60C',
             'Resistor R1 85C',
             'Resistor R1 125C']]
)

plt.title("Resistor R1 Values Across Temperatures")
plt.ylabel("Resistance (Ohms)")
plt.show()


In [None]:
plt.figure(figsize=(12,6))

sns.boxplot(
    data=df[['Resistor R2 85C',
             'Resistor R2 125C']]
)

plt.title("Resistor R2 Values Across Temperatures")
plt.ylabel("Resistance (Ohms)")
plt.show()


In [None]:
plt.figure(figsize=(12,6))

sns.boxplot(
    data=df[['Gain (Vout/Vin) 40C',
             'Gain (Vout/Vin) 60C',
             'Gain (Vout/Vin) 85C',]]
)

plt.title("Gain (Vout/Vin) Distribution Across Temperatures")
plt.ylabel("Gain")
plt.show()


In [None]:
plt.figure(figsize=(12,6))

sns.boxplot(
    data=df[['Gain (Vout/Vin) 125C']]
)

plt.title("Gain (Vout/Vin) Distribution At 125C")
plt.ylabel("Gain")
plt.show()

In [None]:
res_cols = [c for c in df.columns if "Resistor R1" in c or "Resistor R2" in c]
plt.figure(figsize=(8,6))
sns.heatmap(df[res_cols].corr(), annot=True, cmap="coolwarm")
plt.title("Resistor Correlation Heatmap")
plt.show()



In [None]:
plt.figure(figsize=(12,6))

for col in ['Resistor R1 40C_delta','Resistor R1 60C_delta','Resistor R1 85C_delta','Resistor R1 125C_delta']:
    sns.regplot(x=df['hours_since_start'], y=df[col], scatter=False, label=col)

plt.legend()
plt.xlabel("Hours Since Start")
plt.ylabel("ΔR (Change from Initial Value)")
plt.title("Linear Trend of Drift for R1 at All Temperatures")
plt.show()


In [None]:
for col in ['Resistor R2 85C', 'Resistor R2 125C']:
    df[col + "_delta"] = df[col] - df[col].iloc[0]


In [None]:
plt.figure(figsize=(12,6))

for col in ['Resistor R2 85C_delta','Resistor R2 125C_delta']:
    sns.regplot(x=df['hours_since_start'], y=df[col], scatter=False, label=col)

plt.legend()
plt.xlabel("Hours Since Start")
plt.ylabel("ΔR (Change from Initial Value)")
plt.title("Linear Trend of Drift for R2 at All Temperatures")
plt.show()


In [None]:
for col in ['Gain (Vout/Vin) 40C','Gain (Vout/Vin) 60C','Gain (Vout/Vin) 85C','Gain (Vout/Vin) 125C']:
    df[col + "_delta"] = df[col] - df[col].iloc[0]


In [None]:
plt.figure(figsize=(12,6))

gain_delta_cols = [
    'Gain (Vout/Vin) 40C_delta',
    'Gain (Vout/Vin) 60C_delta',
    'Gain (Vout/Vin) 85C_delta',
    'Gain (Vout/Vin) 125C_delta'
]

for col in gain_delta_cols:
    sns.regplot(
        x=df['hours_since_start'], 
        y=df[col], 
        scatter=False, 
        label=col
    )

plt.legend()
plt.xlabel("Hours Since Start")
plt.ylabel("ΔGain (Change from Initial Value)")
plt.title("Linear Drift Trend of Gain Across Temperatures")
plt.show()


In [None]:
drift_rates = {}

for col in ['Resistor R1 40C_delta','Resistor R1 60C_delta','Resistor R1 85C_delta','Resistor R1 125C_delta']:
    temp_df = df[['hours_since_start', col]].dropna()   
    slope = np.polyfit(temp_df['hours_since_start'], temp_df[col], 1)[0]
    drift_rates[col] = slope

drift_rates


In [None]:
temps = [40, 60, 85, 125]
rates = list(drift_rates.values())

plt.figure(figsize=(8,6))
plt.scatter(temps, rates)
plt.plot(temps, rates)
plt.title("Drift Rate vs Temperature")
plt.xlabel("Temperature (C)")
plt.ylabel("Drift Rate (Ohm per hour)")
plt.show()


In [None]:
temps = ['Resistor R1 40C', 'Resistor R1 60C', 'Resistor R1 85C', 'Resistor R1 125C']

for col in temps:
    plt.figure(figsize=(12,6))
    plt.plot(df['timestamp'], df[col], label="Cleaned", alpha=0.9)
    plt.plot(df_raw['timestamp'], df_raw[col], label="Raw", linestyle='--', alpha=0.5)
    plt.legend()
    plt.title(f"Effect of Cleaning on {col}")
    plt.xlabel("Time")
    plt.ylabel("Resistance (Ohms)")
    plt.show()


In [None]:
df['timestamp'] = pd.to_datetime(df['timestamp'], format="%m/%d/%Y %H:%M:%S:%f")
df_raw['timestamp'] = pd.to_datetime(df_raw['timestamp'], format="%m/%d/%Y %H:%M:%S:%f")

temps_r2 = ['Resistor R2 85C', 'Resistor R2 125C']

for col in temps_r2:
    plt.figure(figsize=(12,6))
    plt.plot(df['timestamp'], df[col], label="Cleaned", alpha=0.9)
    plt.plot(df_raw['timestamp'], df_raw[col], label="Raw", linestyle='--', alpha=0.5)
    plt.legend()
    plt.title(f"Effect of Cleaning on {col}")
    plt.xlabel("Time")
    plt.ylabel("Resistance (Ohms)")
    plt.show()


In [None]:
gain_cols = [
    'Gain (Vout/Vin) 40C',
    'Gain (Vout/Vin) 60C',
    'Gain (Vout/Vin) 85C',
    'Gain (Vout/Vin) 125C'
]

for col in gain_cols:
    plt.figure(figsize=(12,6))
    plt.plot(df['timestamp'], df[col], label="Cleaned", alpha=0.9)
    plt.plot(df_raw['timestamp'], df_raw[col], label="Raw", linestyle='--', alpha=0.5)
    plt.legend()
    plt.title(f"Effect of Cleaning on {col}")
    plt.xlabel("Time")
    plt.ylabel("Gain (Vout/Vin)")
    plt.show()