In [496]:
# Set proble volume in mL
PROBE_VOLUME_ML = 6

In [497]:
import os
import pandas as pd

def load_dfs_from_path(path="example"):

    combined_df = []

    for entry in os.scandir("example"):  
        if entry.is_file() and entry.name.endswith(".csv"):
            df = pd.read_csv(entry.path)
            combined_df.append(df)

    combined_df = pd.concat(combined_df, ignore_index=True)

    return combined_df

In [498]:
import pandas as pd

# df = pd.read_csv("example/a1_raport.csv")
df = load_dfs_from_path("example")

In [499]:
df.head()

Unnamed: 0,nr.,xpix.,ypix.,xmm,ymm,powierzchniamm,dlugoscmm,szerokoscmm,kat,obwodmm,...,sredn._martinamm,sredn._sitowamm,srednia_jaskrawosc,r,g,b,y,liczenie_obiektow_w,udzial_punktow,pole_obrazu_mm2
0,1,271.5,532.5,27.2,53.2,2.44,3.7,0.8,63.1,8.608326,...,3.7,1.762585,137.2,95.5,149.4,183.2,137.2,1,14.3,19.6
1,2,1226.5,424.5,122.7,42.5,0.51,1.1,0.7,6.33,2.92132,...,1.1,0.801864,117.7,81.3,129.1,153.8,117.6,1,8.6,7.3
2,3,1184.0,687.5,118.4,68.8,0.3,0.7,0.5,114.74,2.297056,...,0.7,0.618039,89.5,62.7,97.3,120.2,89.5,1,6.7,6.0
3,4,796.0,776.0,79.6,77.6,0.84,1.1,0.3,10.58,3.355635,...,1.1,1.031094,100.7,71.9,110.5,125.2,100.7,1,12.6,7.8
4,5,100.0,572.0,10.0,57.2,0.73,1.2,0.8,64.62,3.497056,...,1.2,0.964088,159.8,117.1,175.7,189.9,159.8,1,8.7,10.2


In [500]:
# Scaling
df['dlugoscmm'] = df['dlugoscmm']/1.49
df['szerokoscmm'] = df['szerokoscmm']/1.49
df['powierzchniamm'] = df['powierzchniamm']/2.235

# filtering the things that with high probability are not bacteria
df = df[df['dlugoscmm'] >= 0.2]
df = df[df['szerokoscmm'] <= 1.5]

df['Pw'] = 3.14*(df['szerokoscmm']/2)**2+df['szerokoscmm']*(df['dlugoscmm']-df['szerokoscmm'])
df['R'] = df['Pw'] / df['powierzchniamm']
df['Dk'] = df['dlugoscmm']
df['Sk'] = df['szerokoscmm']
df['D/S'] = (df['Dk'] / df['Sk']).astype('float64')

bacteria_types = []
Dks = []
Sks = []
for idx,row  in df.iterrows():
    if row['R'] > 1.2:
        Dks.append(row['szerokoscmm'])
        Sks.append(row['Pw']*0.8)
        bacteria_types.append("Krzywe")
    else:
        Dks.append(row['dlugoscmm'])
        Sks.append(row['szerokoscmm'])
        if round(row['D/S'],15) > 1.5:
            bacteria_types.append("Pałeczki")
        else:
            bacteria_types.append("Ziarniaki")
df['bacteria_type'] = bacteria_types
df['Dk'] = Dks
df['Sk'] = Sks

df['Ob'] =(3.14*(df['Sk']**3)/6)+(3.14*((df['Sk']**2)/4)*(df['Dk']-df['Sk']))
df = df[df['Ob']>0]
df['bialko'] = 104.5 * (df['Ob']**0.59)
df['wegiel'] = 0.86 * df['bialko']


In [501]:
grouped = df.groupby("bacteria_type")
result_count = grouped.size().reset_index(name="count")
result = grouped["Ob"].mean().reset_index()

result['bialko'] = 104.5 * (result['Ob']**0.59)
result['wegiel'] = 0.86 * result['bialko']
result['count_in_1_ml']=((result_count['count']*48097.39)/10)/(PROBE_VOLUME_ML)
result['biomasa'] =(((104.5*result['Ob']**0.59)*0.86)*result['count_in_1_ml'])/1000000

In [502]:
result_count

Unnamed: 0,bacteria_type,count
0,Krzywe,86
1,Pałeczki,224
2,Ziarniaki,165


In [503]:
result

Unnamed: 0,bacteria_type,Ob,bialko,wegiel,count_in_1_ml,biomasa
0,Krzywe,0.086847,24.716134,21.255875,68939.592333,1.465371
1,Pałeczki,0.198839,40.293205,34.652156,179563.589333,6.222266
2,Ziarniaki,0.130435,31.419407,27.02069,132267.8225,3.573968


In [504]:
import numpy as np

bins = [0, 0.1, 0.2, 0.5, 1.0, float("inf")]
labels = ["<=0.1", "0.1–0.2", "0.2–0.5", "0.5–1.0", ">1.0"]

df["Ob_bucket"] = pd.cut(df["Ob"], bins=bins, labels=labels, right=True)

grouped = df.groupby(["bacteria_type", "Ob_bucket"])
result_bio_stats = grouped.size().reset_index(name="count")

total_bacteria_count = np.sum(result_bio_stats['count'])
total_bacteria_count_1_ml = ((np.sum(result_bio_stats['count'])*48097.39)/10)/(PROBE_VOLUME_ML)

result_bio_stats['count_in_1_ml']=((result_bio_stats['count']*48097.39)/10)/(PROBE_VOLUME_ML)
result_bio_stats['bio_diversity'] =((result_bio_stats['count_in_1_ml']+1)/total_bacteria_count_1_ml)*np.log10((result_bio_stats['count_in_1_ml']+1)/total_bacteria_count_1_ml)

  grouped = df.groupby(["bacteria_type", "Ob_bucket"])


In [505]:
result_bio_stats

Unnamed: 0,bacteria_type,Ob_bucket,count,count_in_1_ml,bio_diversity
0,Krzywe,<=0.1,67,53708.752167,-0.119983
1,Krzywe,0.1–0.2,11,8817.854833,-0.037873
2,Krzywe,0.2–0.5,6,4809.739,-0.023985
3,Krzywe,0.5–1.0,2,1603.246333,-0.010008
4,Krzywe,>1.0,0,0.0,-1.5e-05
5,Pałeczki,<=0.1,112,89781.794667,-0.147953
6,Pałeczki,0.1–0.2,52,41684.404667,-0.105172
7,Pałeczki,0.2–0.5,43,34469.796167,-0.094441
8,Pałeczki,0.5–1.0,12,9619.478,-0.040361
9,Pałeczki,>1.0,5,4008.115833,-0.020822


In [506]:
shannon_index = np.sum(result_bio_stats['bio_diversity'])*-1
print(f"Shannon index: {shannon_index}")

Shannon index: 0.9136913415400122
