In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

file_path = "C://Users//dilar//LivestockAnalysis//livestock_data_2024.xls"

# Read data from the Excel file, sheet "3.", skip the first 4 header rows, read 19 rows
df_data = pd.read_excel(file_path, sheet_name="3.", header=4, nrows=19)

# Rename columns for clarity
df_data.columns = ['Oblast', 'Total_Livestock', 'Cattle', 'Sheep', 'Goats', 'Pigs', 'Horses', 'Poultry', 'Camels', 'Marals', 'Other_Animals']

# Drop the column "Marals" as it's not needed
df_data = df_data.drop(df_data.columns[9], axis=1)

# Drop rows with indexes 0 and 1 (likely containing totals or notes)
df_data.drop(0, inplace=True)  # Remove the row with index 0
df_data.drop(1, inplace=True)  # Remove the row with index 1

# Replace '-' with '0' in selected columns (missing or unavailable values)
df_data["Camels"] = df_data["Camels"].replace('-', '0')
df_data["Other_Animals"] = df_data["Other_Animals"].replace('-', '0')
df_data["Pigs"] = df_data["Pigs"].replace('-', '0')

# Display the first 17 rows of the dataframe
display(df_data.head(17))

# Convert numerical columns to float type
for col in df_data.columns[1:]:
    df_data[col] = pd.to_numeric(df_data[col], errors='coerce')  # General info

# Print basic info about the dataframe
print("\nInfo about collected data:")
print(df_data.info())

# Print statistical summary of the numerical data
print("\nData description:")
print(df_data.describe())

# Check for missing values in the dataset
print("\nMissing values:")
print(df_data.isnull().sum())

plt.figure(figsize=(14, 8))

# Sort data by total livestock in descending order
sorted_data = df_data.sort_values('Total_Livestock', ascending=False)

# Create a bar plot for total livestock by region
sns.barplot(x='Total_Livestock', y='Oblast', data=sorted_data, hue='Oblast', palette='viridis', legend=False)
plt.title('Number of slaughtered cattle by regions of Kazakhstan', fontsize=16)
plt.ylabel('Region', fontsize=14)
plt.xlabel('Quantity', fontsize=14)
plt.grid(axis='x', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()

# List of animal types (columns) to aggregate
animal_types = ['Cattle', 'Sheep', 'Goats', 'Pigs', 'Horses', 'Poultry', 'Camels', 'Other_Animals']
# Sum total for each animal type and sort descending
total_by_type = df_data[animal_types].sum().sort_values(ascending=False)

plt.figure(figsize=(14, 8))
# Create a bar plot for total by animal type
ax = sns.barplot(x=total_by_type.index, y=total_by_type.values, hue=total_by_type.index,  palette='coolwarm', legend=False)
plt.title('Number of slaughtered cattle by the type of cattle', fontsize=16)
plt.xlabel('Type of cattle', fontsize=14)
plt.ylabel('Quantity', fontsize=14)
plt.xticks(rotation=45, ha='right')

# Add numeric labels to each bar
for i, v in enumerate(total_by_type.values):
    ax.text(i, v + 5000, f'{v:,.0f}', ha='center', fontsize=12)

plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()

ImportError: Missing optional dependency 'fsspec'.  Use pip or conda to install fsspec.