1. Loading the CSV File into a DataFrame

In [1]:
import pandas as pd
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
file_path = '/content/drive/MyDrive/01.Data Cleaning and Preprocessing - 01.Data Cleaning and Preprocessing.csv'
df = pd.read_csv(file_path)

# Display the first few rows of the DataFrame
print(df.head())

  Observation  Y-Kappa  ChipRate  BF-CMratio  BlowFlow  ChipLevel4  \
0    31-00:00    23.10    16.520     121.717  1177.607     169.805   
1    31-01:00    27.60    16.810      79.022  1328.360     341.327   
2    31-02:00    23.19    16.709      79.562  1329.407     239.161   
3    31-03:00    23.60    16.478      81.011  1334.877     213.527   
4    31-04:00    22.90    15.618      93.244  1334.168     243.131   

   T-upperExt-2  T-lowerExt-2  UCZAA  WhiteFlow-4  ...  SteamFlow-4  \
0       358.282       329.545  1.443      599.253  ...       67.122   
1       351.050       329.067  1.549      537.201  ...       60.012   
2       350.022       329.260  1.600      549.611  ...       61.304   
3       350.938       331.142  1.604      623.362  ...       68.496   
4       351.640       332.709    NaN      638.672  ...       70.022   

   Lower-HeatT-3  Upper-HeatT-3  ChipMass-4  WeakLiquorF  BlackFlow-2  \
0        329.432        303.099     175.964     1127.197     1319.039   
1     

2. Filtering Data Based on Conditions

In [7]:
# Filter the data where 'Y-Kappa' is greater than 25
filtered_df = df[df['Y-Kappa'] > 25]

# Display the first few rows of the filtered DataFrame
print(filtered_df.head())

   Observation  Y-Kappa  ChipRate  BF-CMratio  BlowFlow  ChipLevel4  \
1     31-01:00    27.60    16.810      79.022  1328.360     341.327   
12    31-11:00    26.62    15.467      84.447  1334.255     386.971   
13    31-12:00    27.20    16.083      82.839  1332.331     366.855   
15    31-14:00    25.40    16.425      72.924  1197.775     118.821   
40     1-15:00    27.10    13.558      83.117  1175.417     289.256   

    T-upperExt-2  T-lowerExt-2  UCZAA  WhiteFlow-4  ...  SteamFlow-4  \
1        351.050       329.067  1.549      537.201  ...       60.012   
12       349.392       321.021  1.428      531.250  ...       59.407   
13       350.094       327.439  1.486      527.893  ...       60.271   
15       350.765       329.799  1.635      585.011  ...       65.474   
40       339.168       318.386  1.360      480.184  ...       48.568   

    Lower-HeatT-3  Upper-HeatT-3  ChipMass-4  WeakLiquorF  BlackFlow-2  \
1         330.823        304.879     163.202      665.975     1297

3. Handling Missing Values

In [8]:
# Droping rows with missing values
df_cleaned = df.dropna()

# Displayed the cleaned DataFrame
print(df_cleaned.head())

  Observation  Y-Kappa  ChipRate  BF-CMratio  BlowFlow  ChipLevel4  \
1    31-01:00    27.60    16.810      79.022  1328.360     341.327   
3    31-03:00    23.60    16.478      81.011  1334.877     213.527   
5     1-08:00    14.23    15.350      85.518  1171.604     198.538   
7    31-06:00    22.65    14.100      91.887  1307.852     288.989   
9    31-08:00    24.70    13.850      96.208  1334.892     362.511   

   T-upperExt-2  T-lowerExt-2  UCZAA  WhiteFlow-4  ...  SteamFlow-4  \
1       351.050       329.067  1.549      537.201  ...       60.012   
3       350.938       331.142  1.604      623.362  ...       68.496   
5       344.014       325.195  1.436      628.245  ...       65.225   
7       352.321       331.162  1.468      625.549  ...       71.298   
9       352.372       327.358  1.515      553.172  ...       64.249   

   Lower-HeatT-3  Upper-HeatT-3  ChipMass-4  WeakLiquorF  BlackFlow-2  \
1        330.823        304.879     163.202      665.975     1297.317   
3     

In [11]:
# Fill missing values with a specific value
df_filled = df.fillna(0)

# Fill missing values in a specific column with the mean of that column
df['BF-CMratio'] = df['BF-CMratio'].fillna(df['BF-CMratio'].mean())

# Displayed the filled DataFrame
print(df_filled.head())

  Observation  Y-Kappa  ChipRate  BF-CMratio  BlowFlow  ChipLevel4  \
0    31-00:00    23.10    16.520     121.717  1177.607     169.805   
1    31-01:00    27.60    16.810      79.022  1328.360     341.327   
2    31-02:00    23.19    16.709      79.562  1329.407     239.161   
3    31-03:00    23.60    16.478      81.011  1334.877     213.527   
4    31-04:00    22.90    15.618      93.244  1334.168     243.131   

   T-upperExt-2  T-lowerExt-2  UCZAA  WhiteFlow-4  ...  SteamFlow-4  \
0       358.282       329.545  1.443      599.253  ...       67.122   
1       351.050       329.067  1.549      537.201  ...       60.012   
2       350.022       329.260  1.600      549.611  ...       61.304   
3       350.938       331.142  1.604      623.362  ...       68.496   
4       351.640       332.709  0.000      638.672  ...       70.022   

   Lower-HeatT-3  Upper-HeatT-3  ChipMass-4  WeakLiquorF  BlackFlow-2  \
0        329.432        303.099     175.964     1127.197     1319.039   
1     

4. Calculating Summary Statistics

In [10]:
# Calculate summary statistics
summary_stats = df.describe()

# Display summary statistics
print(summary_stats)

# Calculate the mean
mean_value = df['Y-Kappa'].mean()

# Calculate the median
median_value = df['Y-Kappa'].median()

# Calculate the standard deviation
std_dev = df['Y-Kappa'].std()

# Display calculated statistics
print(f"Mean: {mean_value}, Median: {median_value}, Std Dev: {std_dev}")

          Y-Kappa    ChipRate  BF-CMratio     BlowFlow  ChipLevel4  \
count  324.000000  319.000000  307.000000   308.000000  323.000000   
mean    20.635370   14.347937   87.464456  1237.837614  258.164483   
std      3.070036    1.499095    7.995012   100.593735   87.987452   
min     12.170000    9.983000   68.645000     0.000000    0.000000   
25%     18.382500   13.358000   81.823000  1193.215250  213.527000   
50%     20.845000   14.308000   86.739000  1273.138500  271.792000   
75%     23.032500   15.517000   92.372000  1289.196000  321.680000   
max     27.600000   16.958000  121.717000  1351.240000  419.014000   

       T-upperExt-2  T-lowerExt-2       UCZAA  WhiteFlow-4  AAWhiteSt-4  ...  \
count    322.000000    322.000000  299.000000   323.000000   173.000000  ...   
mean     356.904295    324.020180    1.492010   591.732260     6.140410  ...   
std        9.209290      7.621402    0.105923    67.016351     0.081609  ...   
min      339.168000    284.633000    1.182000   4