In [2]:
import pandas as pd
import numpy as np

# --- 1. Define Parameters for Hypothetical Data ---

features = {
    'Mitochondria_Area': {
        'Fragmented': {'mean': 20, 'std': 5},
        'Tubular': {'mean': 80, 'std': 15},
        'Intermediate': {'mean': 50, 'std': 10},
        'Hypertubular': {'mean': 150, 'std': 25}
    },
    'Aspect_Ratio': {
        'Fragmented': {'mean': 1.5, 'std': 0.3},
        'Tubular': {'mean': 5, 'std': 1},
        'Intermediate': {'mean': 2.5, 'std': 0.5},
        'Hypertubular': {'mean': 8, 'std': 1.5}
    },
    'Form_Factor': {
        'Fragmented': {'mean': 0.8, 'std': 0.1},
        'Tubular': {'mean': 0.2, 'std': 0.05},
        'Intermediate': {'mean': 0.5, 'std': 0.15},
        'Hypertubular': {'mean': 0.1, 'std': 0.03}
    },
    'Network_Branching': {
        'Fragmented': {'mean': 1, 'std': 0.5},
        'Tubular': {'mean': 5, 'std': 2},
        'Intermediate': {'mean': 2, 'std': 1},
        'Hypertubular': {'mean': 10, 'std': 3}
    },
    'Mitochondria_Count': {
        'Fragmented': {'mean': 20, 'std': 5},
        'Tubular': {'mean': 5, 'std': 2},
        'Intermediate': {'mean': 12, 'std': 3},
        'Hypertubular': {'mean': 3, 'std': 1}
    }
}

n_samples = 100

# --- 2. Generate the Data ---

data = []

for morphology in ['Fragmented', 'Tubular', 'Intermediate', 'Hypertubular']:
    for _ in range(n_samples):
        sample = {'Morphology': morphology}
        for feature_name, feature_params in features.items():
            mean = feature_params[morphology]['mean']
            std = feature_params[morphology]['std']
            sample[feature_name] = np.random.normal(mean, std)
        data.append(sample)

# --- 3. Convert to Pandas DataFrame ---

df = pd.DataFrame(data)

# --- 4. Inspect the Data (Optional) ---

print(df.head())
print(df['Morphology'].value_counts())

# --- 5. Save to CSV File ---

df.to_csv('mitochondria_morphology_data.csv', index=False)

print("Data saved to mitochondria_morphology_data.csv")

   Morphology  Mitochondria_Area  Aspect_Ratio  Form_Factor  \
0  Fragmented          13.385032      1.195282     1.011433   
1  Fragmented          20.924661      1.408699     0.774042   
2  Fragmented          22.349468      1.188097     0.956764   
3  Fragmented          17.426156      1.519802     0.829090   
4  Fragmented          19.513021      1.529458     0.972451   

   Network_Branching  Mitochondria_Count  
0           1.053668           26.060329  
1           0.777228           25.332761  
2           1.778024           15.122217  
3           1.376439           24.559683  
4           1.416382           23.052112  
Morphology
Fragmented      100
Tubular         100
Intermediate    100
Hypertubular    100
Name: count, dtype: int64
Data saved to mitochondria_morphology_data.csv


In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report

# Load the data from the CSV file
df = pd.read_csv('mitochondria_morphology_data.csv')

# Separate features (X) and target (y)
X = df.drop('Morphology', axis=1)  # All columns except 'Morphology' are features
y = df['Morphology']            # The 'Morphology' column is our target

# --- Preprocessing (Scaling) ---
# Scale the features so that they have zero mean and unit variance
scaler = StandardScaler()
X = scaler.fit_transform(X)  # Fit the scaler on the data and transform

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# --- Choose a Model ---
model = GaussianNB()

# --- Train the Model ---
model.fit(X_train, y_train)

# --- Make Predictions ---
y_pred = model.predict(X_test)

# --- Evaluate the Model ---
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")
print(classification_report(y_test, y_pred))

Accuracy: 1.0
              precision    recall  f1-score   support

  Fragmented       1.00      1.00      1.00        26
Hypertubular       1.00      1.00      1.00        18
Intermediate       1.00      1.00      1.00        18
     Tubular       1.00      1.00      1.00        18

    accuracy                           1.00        80
   macro avg       1.00      1.00      1.00        80
weighted avg       1.00      1.00      1.00        80



In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report

# Load the data from the CSV file
df = pd.read_csv('mitochondria_morphology_data.csv')

# Separate features (X) and target (y)
X = df.drop('Morphology', axis=1)  # All columns except 'Morphology' are features
y = df['Morphology']            # The 'Morphology' column is our target

# --- Preprocessing (Scaling) ---
# Scale the features so that they have zero mean and unit variance
scaler = StandardScaler()
X = scaler.fit_transform(X)  # Fit the scaler on the data and transform

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# --- Choose a Model ---
model = GaussianNB()

# --- Train the Model ---
model.fit(X_train, y_train)

# --- Make Predictions ---
y_pred = model.predict(X_test)

# --- Evaluate the Model ---
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")
print(classification_report(y_test, y_pred))

Accuracy: 1.0
              precision    recall  f1-score   support

  Fragmented       1.00      1.00      1.00        26
Hypertubular       1.00      1.00      1.00        18
Intermediate       1.00      1.00      1.00        18
     Tubular       1.00      1.00      1.00        18

    accuracy                           1.00        80
   macro avg       1.00      1.00      1.00        80
weighted avg       1.00      1.00      1.00        80

