In [None]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns 

plt.style.use("ggplot")

In [None]:
df = pd.read_excel("datafiles/Insy6500_Project_Dataset_Updated.xlsx")
df_raw = df.copy()
df.head()


In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.isnull().sum()

In [None]:
df['timestamp'] = pd.to_datetime(
    df['timestamp'], 
    format="%m/%d/%Y %H:%M:%S:%f"
)

df['hours_since_start'] = (df['timestamp'] - df['timestamp'].min()) / pd.Timedelta(hours=1)
df[['timestamp','hours_since_start']].head()


In [None]:
(df._get_numeric_data() < 0).sum()


In [None]:
df[['Resistor R1 40C','Resistor R1 60C','Resistor R1 85C','Resistor R1 125C']].describe()

In [None]:
df[['Gain (Vout/Vin) 40C','Gain (Vout/Vin) 60C','Gain (Vout/Vin) 85C','Gain (Vout/Vin) 125C']].describe()

In [None]:
# Remove negative or zero gain
for col in ['Gain (Vout/Vin) 40C','Gain (Vout/Vin) 60C','Gain (Vout/Vin) 85C','Gain (Vout/Vin) 125C']:
    df = df[df[col] > 0]

# Remove unrealistic resistor spikes
for col in ['Resistor R1 60C','Resistor R1 85C']:
    df = df[df[col] > 50]

df.shape


In [None]:
for col in ['Resistor R1 40C','Resistor R1 60C','Resistor R1 85C','Resistor R1 125C']:
    df[col + "_delta"] = df[col] - df[col].iloc[0]

df.head()


In [None]:
plt.figure(figsize=(12,6))
plt.plot(df['timestamp'], df['Resistor R1 40C'], label='R1 40°C')
plt.plot(df['timestamp'], df['Resistor R1 60C'], label='R1 60°C')
plt.plot(df['timestamp'], df['Resistor R1 85C'], label='R1 85°C')
plt.plot(df['timestamp'], df['Resistor R1 125C'], label='R1 125°C')
plt.legend()
plt.xlabel("Time")
plt.ylabel("Resistance (Ohms)")
plt.title("Resistor R1 Over Time")
plt.show()


In [None]:
plt.figure(figsize=(12,6))

for col in ['Resistor R1 40C','Resistor R1 60C','Resistor R1 85C','Resistor R1 125C']:
    plt.scatter(df['hours_since_start'], df[col], s=10, alpha=0.5, label=col)

plt.legend()
plt.xlabel("Hours Since Start")
plt.ylabel("Resistance (Ohms)")
plt.title("Scatter Plot of R1 Drift Over Time")
plt.show()


In [None]:
plt.figure(figsize=(12,6))
plt.plot(df['timestamp'], df['Gain (Vout/Vin) 40C'], label='Gain 40°C')
plt.plot(df['timestamp'], df['Gain (Vout/Vin) 60C'], label='Gain 60°C')
plt.plot(df['timestamp'], df['Gain (Vout/Vin) 85C'], label='Gain 85°C')
plt.plot(df['timestamp'], df['Gain (Vout/Vin) 125C'], label='Gain 125°C')
plt.legend()
plt.xlabel("Time")
plt.ylabel("Gain")
plt.title("Gain Over Time Across Temperatures")
plt.show()


In [None]:
sns.histplot(df['Resistor R1 40C'], kde=True)
plt.title("Distribution of R1 at 40°C")
plt.show()


In [None]:
plt.figure(figsize=(12,6))

sns.boxplot(
    data=df[['Resistor R1 40C',
             'Resistor R1 60C',
             'Resistor R1 85C',
             'Resistor R1 125C']]
)

plt.title("Resistor R1 Values Across Temperatures")
plt.ylabel("Resistance (Ohms)")
plt.show()


In [None]:
res_cols = [c for c in df.columns if "Resistor R1" in c or "Resistor R2" in c]
plt.figure(figsize=(8,6))
sns.heatmap(df[res_cols].corr(), annot=True, cmap="coolwarm")
plt.title("Resistor Correlation Heatmap")
plt.show()



In [None]:
plt.figure(figsize=(12,6))

for col in ['Resistor R1 40C_delta','Resistor R1 60C_delta','Resistor R1 85C_delta','Resistor R1 125C_delta']:
    sns.regplot(x=df['hours_since_start'], y=df[col], scatter=False, label=col)

plt.legend()
plt.xlabel("Hours Since Start")
plt.ylabel("ΔR (Change from Initial Value)")
plt.title("Linear Trend of Drift for R1 at All Temperatures")
plt.show()


In [None]:
drift_rates = {}

for col in ['Resistor R1 40C_delta','Resistor R1 60C_delta','Resistor R1 85C_delta','Resistor R1 125C_delta']:
    slope = np.polyfit(df['hours_since_start'], df[col], 1)[0]
    drift_rates[col] = slope

drift_rates


In [None]:
df['R1_40C_roll'] = df['Resistor R1 40C'].rolling(window=50).mean()

plt.figure(figsize=(12,6))
plt.plot(df['timestamp'], df['R1_40C_roll'])
plt.title("Smoothed Trend for R1 @ 40C")
plt.show()


In [None]:
plt.figure(figsize=(12,6))
sns.kdeplot(df['Resistor R1 40C'], label="40°C")
sns.kdeplot(df['Resistor R1 60C'], label="60°C")
sns.kdeplot(df['Resistor R1 85C'], label="85°C")
sns.kdeplot(df['Resistor R1 125C'], label="125°C")
plt.title("R1 Distributions Across Temperatures")
plt.legend()
plt.show()


In [None]:
outliers = df[df['Resistor R1 60C'] < 50]
outliers.head()


In [None]:
for col in ['Resistor R1 40C','Resistor R1 60C','Resistor R1 85C','Resistor R1 125C']:
    df[col + '_pct_change'] = (df[col] - df[col].iloc[0]) / df[col].iloc[0] * 100


In [None]:
sns.residplot(x=df['hours_since_start'], y=df['Resistor R1 85C_delta'])
plt.title("Residuals for R1 85°C Drift")
plt.show()


In [None]:
temps = [40, 60, 85, 125]
rates = list(drift_rates.values())

plt.figure(figsize=(8,6))
plt.scatter(temps, rates)
plt.plot(temps, rates)
plt.title("Drift Rate vs Temperature")
plt.xlabel("Temperature (C)")
plt.ylabel("Drift Rate (Ohm per hour)")
plt.show()


In [None]:
plt.figure(figsize=(12,6))
plt.plot(df['timestamp'], df['Gain (Vout/Vin) 60C'], label="Cleaned", alpha=0.9)
plt.plot(df_raw['timestamp'], df_raw['Gain (Vout/Vin) 60C'], label="Raw", linestyle='--', alpha=0.6)
plt.legend()
plt.title("Effect of Cleaning on Gain (Vout/Vin) 60°C")
plt.show()


In [None]:
plt.figure(figsize=(12,6))
plt.plot(df['timestamp'], df['Resistor R1 85C'], label="Cleaned")
plt.plot(df_raw['timestamp'], df_raw['Resistor R1 85C'], label="Raw", linestyle='--', alpha=0.5)
plt.legend()
plt.title("Effect of Cleaning on R1 85°C")
plt.show()
