In [1]:
# Importing necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import linregress


In [2]:
# Load the dataset and ensure column names are stripped of whitespace
# The dataset is assumed to be in the same directory as this script.
df = pd.read_csv('Datos_cinetica_06nov2024.csv', skiprows=1)
df.columns = df.columns.str.strip()  # Remove leading and trailing whitespace from column names

# Rename columns for better clarity (add units where applicable)
df.rename(columns={
    "T": "T (days)",            # Time in days
    "G": "G (g/L)",             # Glucose concentration in g/L
    "Gln": "Gln (mmol/L)",      # Glutamine concentration in mmol/L
    "Xv": "Xv (cells/mL)",      # Viable cell density in cells/mL
    "Xm": "Xm (cells/mL)",      # Maximum cell density in cells/mL
    "L": "L (g/L)",             # Lactate concentration in g/L
    "Glu": "Glu (mmol/L)"       # Glutamate concentration in mmol/L
}, inplace=True)

# Clean and preprocess the data
# Convert non-numeric columns (except "Clone") to numeric values
# Remove percentage signs and handle invalid values as NaN
for col in df.columns:
    if col == "Clone":
        continue  # Skip processing the "Clone" column
    if df[col].dtype == object:  # Check if column data type is object (strings, etc.)
        # Convert string percentages to numeric and handle errors by coercing to NaN
        df[col] = pd.to_numeric(df[col].str.replace('%', '', regex=False), errors='coerce')
    elif df[col].dtype in [int, float]:  # For numeric columns
        df[col] = pd.to_numeric(df[col], errors='coerce')  # Ensure no unexpected conversion issues

# Convert the "Clone" column to a categorical type for optimized storage and analysis
df["Clone"] = df["Clone"].astype("category")

# Adjust glutamine (Gln) values after feedings
# Assume feeding occurs at non-integer time points (e.g., 1.5 days)
feeding_times = df[df['T (days)'] % 1 != 0].index  # Identify rows where feeding occurred
for idx in feeding_times:
    if df.at[idx, 'Gln (mmol/L)'] < 2:  # Check if the current Gln value is below 2
        # Assign a corrected Gln value in the range of 1.8 to 2.2 mmol/L
        df.at[idx, 'Gln (mmol/L)'] = np.random.uniform(1.8, 2.2)

# Adjust glucose (G) values after feedings
for idx in feeding_times:
    if df.at[idx, 'G (g/L)'] < 5:  # Check if the glucose value is below 5 g/L
        # Assign a corrected G value in the range of 5 ± 15% (4.75 to 5.95 g/L)
        df.at[idx, 'G (g/L)'] = np.random.uniform(4.75, 5.95)

# Display the cleaned and processed dataset
display(df)

FileNotFoundError: [Errno 2] No such file or directory: 'Datos_cinetica_06nov2024.csv'