# Plantation Data Analysis and Visualization

This notebook provides comprehensive analysis and visualization of plantation data.

In [2]:
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns
import folium
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime
import contextily as ctx

# Set plotting style
# plt.style.use('seaborn')
# sns.set_palette('husl')

In [9]:
!pip install pyogrio

Collecting pyogrio
  Downloading pyogrio-0.10.0.tar.gz (281 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m281.9/281.9 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Building wheels for collected packages: pyogrio
  Building wheel for pyogrio (pyproject.toml) ... [?25ldone
[?25h  Created wheel for pyogrio: filename=pyogrio-0.10.0-cp311-cp311-macosx_10_9_x86_64.whl size=626589 sha256=f6a8c125b2143c00cfd1d50b99ab55ba3b138525e18f2ccdab04f591e2999dc2
  Stored in directory: /Users/abhishek/Library/Caches/pip/wheels/72/55/75/093bef8c5bd7e716d6ae56df939ff12f65594ee52d6597b250
Successfully built pyogrio
Installing collected packages: pyogrio
Successfully installed pyogrio-0.10.0


In [10]:
import pyogrio
# import fiona

ImportError: dlopen(/Users/abhishek/anaconda3/envs/dl/lib/python3.11/site-packages/pyogrio/_vsi.cpython-311-darwin.so, 0x0002): Library not loaded: @rpath/libpoppler.126.dylib
  Referenced from: <413E6A19-458B-3234-A9DD-C70570AB464D> /Users/abhishek/anaconda3/envs/dl/lib/libgdal.32.3.6.2.dylib
  Reason: tried: '/Users/abhishek/anaconda3/envs/dl/lib/libpoppler.126.dylib' (no such file), '/Users/abhishek/anaconda3/envs/dl/lib/libpoppler.126.dylib' (no such file), '/System/Volumes/Preboot/Cryptexes/OS/Users/abhishek/anaconda3/envs/dl/lib/libpoppler.126.dylib' (no such file), '/Users/abhishek/anaconda3/envs/dl/lib/libpoppler.126.dylib' (no such file), '/System/Volumes/Preboot/Cryptexes/OS/Users/abhishek/anaconda3/envs/dl/lib/libpoppler.126.dylib' (no such file), '/Users/abhishek/anaconda3/envs/dl/lib/libpoppler.126.dylib' (no such file), '/System/Volumes/Preboot/Cryptexes/OS/Users/abhishek/anaconda3/envs/dl/lib/libpoppler.126.dylib' (no such file), '/Users/abhishek/anaconda3/envs/dl/lib/libpoppler.126.dylib' (no such file), '/System/Volumes/Preboot/Cryptexes/OS/Users/abhishek/anaconda3/envs/dl/lib/libpoppler.126.dylib' (no such file), '/Users/abhishek/anaconda3/envs/dl/bin/../lib/libpoppler.126.dylib' (no such file), '/Users/abhishek/anaconda3/envs/dl/bin/../lib/libpoppler.126.dylib' (no such file), '/usr/local/lib/libpoppler.126.dylib' (no such file), '/usr/lib/libpoppler.126.dylib' (no such file, not in dyld cache)

## 1. Data Loading and Initial Analysis

In [11]:
# Load the GeoJSON data
gdf = gpd.read_file('Plantations Data.geojson')

# Display basic information
print("Dataset Overview:")
print(f"Number of plantations: {len(gdf)}")
print(f"Number of unique farmers: {gdf['FARMER NAME'].nunique()}")
print(f"Total plantation area: {gdf['PLOT AREA'].sum():.2f} hectares")

# Display first few rows
gdf.head()

ImportError: The 'read_file' function requires the 'pyogrio' or 'fiona' package, but neither is installed or imports correctly.
Importing fiona resulted in: dlopen(/Users/abhishek/anaconda3/envs/dl/lib/python3.11/site-packages/fiona/_env.cpython-311-darwin.so, 0x0002): Library not loaded: @rpath/libpoppler.126.dylib
  Referenced from: <413E6A19-458B-3234-A9DD-C70570AB464D> /Users/abhishek/anaconda3/envs/dl/lib/libgdal.32.3.6.2.dylib
  Reason: tried: '/Users/abhishek/anaconda3/envs/dl/lib/libpoppler.126.dylib' (no such file), '/Users/abhishek/anaconda3/envs/dl/lib/python3.11/site-packages/fiona/../../../libpoppler.126.dylib' (no such file), '/Users/abhishek/anaconda3/envs/dl/lib/python3.11/site-packages/fiona/../../../libpoppler.126.dylib' (no such file), '/Users/abhishek/anaconda3/envs/dl/bin/../lib/libpoppler.126.dylib' (no such file), '/Users/abhishek/anaconda3/envs/dl/bin/../lib/libpoppler.126.dylib' (no such file), '/usr/local/lib/libpoppler.126.dylib' (no such file), '/usr/lib/libpoppler.126.dylib' (no such file, not in dyld cache)
Importing pyogrio resulted in: No module named 'pyogrio'

## 2. Spatial Distribution Analysis

In [None]:
# Create interactive map
center_lat = gdf.geometry.centroid.y.mean()
center_lon = gdf.geometry.centroid.x.mean()

m = folium.Map(location=[center_lat, center_lon], zoom_start=10)

# Add plantations to map
for idx, row in gdf.iterrows():
    folium.GeoJson(
        row.geometry.__geo_interface__,
        popup=f"Farmer: {row['FARMER NAME']}<br>Area: {row['PLOT AREA']} ha<br>Growth: {row['PLTN GROWTH']}"
    ).add_to(m)

m

## 3. Temporal Analysis

In [None]:
# Convert dates
gdf['PLANTING DATE'] = pd.to_datetime(gdf['PLANTING DATE'])
gdf['COPPICING DATE'] = pd.to_datetime(gdf['COPPICING DATE'])

# Plot planting timeline
plt.figure(figsize=(15, 6))
gdf['PLANTING DATE'].value_counts().sort_index().plot(kind='line')
plt.title('Plantation Timeline')
plt.xlabel('Date')
plt.ylabel('Number of Plantations')
plt.xticks(rotation=45)
plt.show()

## 4. Growth Analysis

In [None]:
# Growth quality distribution
plt.figure(figsize=(10, 6))
sns.countplot(data=gdf, x='PLTN GROWTH')
plt.title('Distribution of Plantation Growth Quality')
plt.xticks(rotation=45)
plt.show()

# Height analysis
plt.figure(figsize=(12, 6))
sns.boxplot(data=gdf, x='PLTN GROWTH', y='AVG HEIGHT')
plt.title('Height Distribution by Growth Quality')
plt.show()

## 5. Area Analysis

In [None]:
# Plot area distribution
plt.figure(figsize=(12, 6))
sns.histplot(data=gdf, x='PLOT AREA', bins=30)
plt.title('Distribution of Plot Areas')
plt.xlabel('Area (hectares)')
plt.show()

# Area by district
district_area = gdf.groupby('DISTRICT')['PLOT AREA'].sum().sort_values(ascending=True)
plt.figure(figsize=(12, 8))
district_area.plot(kind='barh')
plt.title('Total Plantation Area by District')
plt.xlabel('Total Area (hectares)')
plt.show()

## 6. Species Analysis

In [None]:
# Species distribution
plt.figure(figsize=(10, 6))
gdf['TREE SPECIES'].value_counts().plot(kind='pie', autopct='%1.1f%%')
plt.title('Distribution of Tree Species')
plt.show()

# Growth quality by species
plt.figure(figsize=(12, 6))
pd.crosstab(gdf['TREE SPECIES'], gdf['PLTN GROWTH']).plot(kind='bar', stacked=True)
plt.title('Growth Quality Distribution by Species')
plt.xticks(rotation=45)
plt.legend(title='Growth Quality')
plt.show()

## 7. Statistical Analysis

In [None]:
# Calculate key statistics
stats = {
    'Total Plantations': len(gdf),
    'Total Area': gdf['PLOT AREA'].sum(),
    'Average Plot Size': gdf['PLOT AREA'].mean(),
    'Median Plot Size': gdf['PLOT AREA'].median(),
    'Number of Districts': gdf['DISTRICT'].nunique(),
    'Number of Farmers': gdf['FARMER NAME'].nunique(),
    'Average Height': gdf['AVG HEIGHT'].mean(),
    'Growth Quality Distribution': gdf['PLTN GROWTH'].value_counts().to_dict()
}

pd.Series(stats)

## 8. Correlation Analysis

In [None]:
# Select numerical columns
numerical_cols = ['PLOT AREA', 'AVG HEIGHT', 'Row to Row SPACING', 'Plant to Plant SPACING', 'Age']
correlation_matrix = gdf[numerical_cols].corr()

# Plot correlation heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0)
plt.title('Correlation Matrix of Numerical Variables')
plt.show()