In [1]:
import pandas as pd
import geopandas as gpd
import shapely
import glob
from shapely.geometry import Polygon
import rasterstats
from rasterstats import zonal_stats

In [2]:
#part 1, step 1:
#creating a list of x,y pairs for each district

d01_xy = []
d05_xy = []
d06_xy = []

with open('district01.txt') as d01, open('district05.txt') as d05, open('district06.txt') as d06:
    for line in d01:
        coords = line.split()
        if coords[0] != 'X':
            d01_xy.append((float(coords[0]), float(coords[1])))
    for line in d05:
        coords = line.split()
        if coords[0] != 'X':
            d05_xy.append((float(coords[0]), float(coords[1])))
    for line in d06:
        coords = line.split()
        if coords[0] != 'X':
            d06_xy.append((float(coords[0]), float(coords[1])))

In [3]:
#part 1, step 2:
#creating polygons for each district

d01_poly = Polygon(d01_xy)
d05_poly = Polygon(d05_xy)
d06_poly = Polygon(d06_xy)


#creating a master data dictionary to hold all the data needed

districts = {'dist':['01', '05', '06'],
            'num_coords':[len(d01_xy), len(d05_xy), len(d06_xy)],
            'geom':[d01_poly, d05_poly, d06_poly]
            }


#building a data frame and geodataframe off the master data dictionary

df = pd.DataFrame(districts)
gdf = gpd.GeoDataFrame(df, crs="EPSG:4326", geometry='geom')

In [4]:
#part 2, step 1:
#creating the final dictionaries to hold the final output
#doing the maths to calculate total pixels, agricutural pixels, and percentage agriculture in each district in 2004 and 2009

files = glob.glob('../lab2/*.tif')

results = {'districts':['01', '05', '06', '01', '05', '06'],
           'year':['2004', '2004', '2004', '2009', '2009', '2009'],
           'percent_ag':[]
          }

for file_name in files:
    zs = pd.DataFrame(zonal_stats(gdf, file_name, stats = ['sum', 'count']))
    ag_pixels = list(zs['sum'])
    total_pixels = list(zs['count'])
    percent_ag = [a/b for a, b in zip(ag_pixels, total_pixels)]
    for each_line in percent_ag:
        results['percent_ag'].append((each_line)*100)
    
df_results = pd.DataFrame(results)

In [5]:
#part 2, step 2: 
#final print statement

df_results

Unnamed: 0,districts,year,percent_ag
0,1,2004,40.002673
1,5,2004,35.722357
2,6,2004,45.835598
3,1,2009,55.108631
4,5,2009,34.700027
5,6,2009,45.32206
