In [7]:
import pandas as pd
import numpy as np

# Load the data
df = pd.read_csv('penguins.csv')

# Display the first few rows of the DataFrame
print(df.head())

# Preprocessing: Drop rows with missing values
df = df.dropna()

# Example: Let's compute the joint probability distribution of two features, e.g., 'species' and 'island'

# Encode categorical variables
df['species_encoded'] = df['species'].astype('category').cat.codes
df['island_encoded'] = df['island'].astype('category').cat.codes

# Compute the joint probability distribution
joint_counts = pd.crosstab(df['species_encoded'], df['island_encoded'])
joint_probabilities = joint_counts / joint_counts.sum().sum()

print("Joint Probability Distribution:")
print(joint_probabilities)

# If you need to use labels for better readability
species_labels = df['species'].astype('category').cat.categories
island_labels = df['island'].astype('category').cat.categories

# Create a DataFrame for the joint probability distribution with labels
joint_prob_df = pd.DataFrame(joint_probabilities.values, index=species_labels, columns=island_labels)

print("Joint Probability Distribution with labels:")
print(joint_prob_df)
print('Gayatri Kulkarni-53004230002')

  species     island  bill_length_mm  bill_depth_mm  flipper_length_mm  \
0  Adelie  Torgersen            39.1           18.7              181.0   
1  Adelie  Torgersen            39.5           17.4              186.0   
2  Adelie  Torgersen            40.3           18.0              195.0   
3  Adelie  Torgersen             NaN            NaN                NaN   
4  Adelie  Torgersen            36.7           19.3              193.0   

   body_mass_g     sex  year  
0       3750.0    male  2007  
1       3800.0  female  2007  
2       3250.0  female  2007  
3          NaN     NaN  2007  
4       3450.0  female  2007  
Joint Probability Distribution:
island_encoded          0         1         2
species_encoded                              
0                0.132132  0.165165  0.141141
1                0.000000  0.204204  0.000000
2                0.357357  0.000000  0.000000
Joint Probability Distribution with labels:
             Biscoe     Dream  Torgersen
Adelie     0.132132  0

In [8]:
import pandas as pd

# Load the data
df = pd.read_csv('penguins.csv')

# Display the first few rows of the DataFrame
print(df.head())

# Preprocessing: Drop rows with missing values
df = df.dropna()

# Encode categorical variables
df['species_encoded'] = df['species'].astype('category').cat.codes
df['island_encoded'] = df['island'].astype('category').cat.codes

# Compute joint probability distribution
joint_counts = pd.crosstab(df['species_encoded'], df['island_encoded'])
joint_probabilities = joint_counts / joint_counts.sum().sum()

# Compute marginal probabilities
marginal_island_prob = joint_counts.sum(axis=0) / joint_counts.sum().sum()

# Compute conditional probabilities P(Species | Island)
conditional_prob = joint_probabilities.div(marginal_island_prob)

# If you need to use labels for better readability
species_labels = df['species'].astype('category').cat.categories
island_labels = df['island'].astype('category').cat.categories

# Create a DataFrame for the conditional probability distribution with labels
conditional_prob_df = pd.DataFrame(conditional_prob.values, index=species_labels, columns=island_labels)

print("Conditional Probability Distribution P(Species | Island):")
print(conditional_prob_df)
print('Gayatri Kulkarni-53004230002')

  species     island  bill_length_mm  bill_depth_mm  flipper_length_mm  \
0  Adelie  Torgersen            39.1           18.7              181.0   
1  Adelie  Torgersen            39.5           17.4              186.0   
2  Adelie  Torgersen            40.3           18.0              195.0   
3  Adelie  Torgersen             NaN            NaN                NaN   
4  Adelie  Torgersen            36.7           19.3              193.0   

   body_mass_g     sex  year  
0       3750.0    male  2007  
1       3800.0  female  2007  
2       3250.0  female  2007  
3          NaN     NaN  2007  
4       3450.0  female  2007  
Conditional Probability Distribution P(Species | Island):
             Biscoe     Dream  Torgersen
Adelie     0.269939  0.447154        1.0
Chinstrap  0.000000  0.552846        0.0
Gentoo     0.730061  0.000000        0.0
Gayatri Kulkarni-53004230002


In [1]:
pip install pandas


Note: you may need to restart the kernel to use updated packages.


In [3]:
import pandas as pd

# Load the penguins dataset from a CSV file
df = pd.read_csv('penguins.csv')

# Preview the data
print("Data Preview:")
print(df.head())

# Create a pivot table for joint probability
# Pivot table will be for Species (rows) and Island (columns), and we'll compute frequencies
pivot_table = pd.crosstab(df['species'], df['island'], normalize=True)

print("\nJoint Probability (Pivot Table):")
print(pivot_table)

# Example: Conditional Probability of Species given Island
# We can normalize along columns to get conditional probabilities
conditional_probability = pivot_table.div(pivot_table.sum(axis=0), axis=1)

print("\nConditional Probability of Species given Island:")
print(conditional_probability)

# To calculate Joint Probability, we already have it in the pivot table, normalized=True gives joint probabilities
print("\nJoint Probability is represented in the pivot table (Species vs Island):")
print(pivot_table)

# Example: Calculating P(Species = Adelie | Island = Biscoe)
p_adelie_given_biscoe = conditional_probability.loc['Adelie', 'Biscoe']
print(f"\nP(Adelie | Biscoe) = {p_adelie_given_biscoe:.4f}")
print('Gayatri Kulkarni-53004230002')

Data Preview:
  species     island  bill_length_mm  bill_depth_mm  flipper_length_mm  \
0  Adelie  Torgersen            39.1           18.7              181.0   
1  Adelie  Torgersen            39.5           17.4              186.0   
2  Adelie  Torgersen            40.3           18.0              195.0   
3  Adelie  Torgersen             NaN            NaN                NaN   
4  Adelie  Torgersen            36.7           19.3              193.0   

   body_mass_g     sex  year  
0       3750.0    male  2007  
1       3800.0  female  2007  
2       3250.0  female  2007  
3          NaN     NaN  2007  
4       3450.0  female  2007  

Joint Probability (Pivot Table):
island       Biscoe     Dream  Torgersen
species                                 
Adelie     0.127907  0.162791   0.151163
Chinstrap  0.000000  0.197674   0.000000
Gentoo     0.360465  0.000000   0.000000

Conditional Probability of Species given Island:
island       Biscoe     Dream  Torgersen
species                  