In [2]:
import numpy as np

import pandas as pd
from pandas.api.types import CategoricalDtype

%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings("ignore")

import zipfile
import os

data = pd.read_csv("california_wine_production.csv")

data.head()

Unnamed: 0,Year,CommodityCode,CropName,CountyCode,County,HarvestedAcres,YieldUnitAcre,Production,PriceDollarsUnit,Unit,ValueDollars,lat,long
0,1980,216299,GRAPESWINE,1,Alameda,2530.0,5.14,13000.0,1497.69,TONS,19470000,37.6469,-121.8888
1,1981,216299,GRAPESWINE,1,Alameda,3390.0,5.34,18100.0,1503.04,TONS,27205000,37.6469,-121.8888
2,1982,216299,GRAPESWINE,1,Alameda,3390.0,5.34,18100.0,1503.04,TONS,27205000,37.6469,-121.8888
3,1983,216299,GRAPESWINE,1,Alameda,2420.0,5.08,12300.0,1447.32,TONS,17802000,37.6469,-121.8888
4,1984,216299,GRAPESWINE,1,Alameda,2630.0,4.83,12700.0,1432.2,TONS,18189000,37.6469,-121.8888


In [3]:
# want to reorder the years the opposite way
data_year = data.groupby('County')[['Year', 'HarvestedAcres', 'YieldUnitAcre']]
data_year.head()

Unnamed: 0,Year,HarvestedAcres,YieldUnitAcre
0,1980,2530.0,5.14
1,1981,3390.0,5.34
2,1982,3390.0,5.34
3,1983,2420.0,5.08
4,1984,2630.0,4.83
...,...,...,...
1301,1998,57.0,2.19
1302,1999,76.0,4.30
1303,2000,91.0,3.87
1304,2001,91.0,1.27


In [4]:
def reversing(x):
    x['Year'] = x['Year'].iloc[::-1].values
    return x

data = data.groupby('County', sort = False).apply(reversing)
data.head()

Unnamed: 0,Year,CommodityCode,CropName,CountyCode,County,HarvestedAcres,YieldUnitAcre,Production,PriceDollarsUnit,Unit,ValueDollars,lat,long
0,2020,216299,GRAPESWINE,1,Alameda,2530.0,5.14,13000.0,1497.69,TONS,19470000,37.6469,-121.8888
1,2019,216299,GRAPESWINE,1,Alameda,3390.0,5.34,18100.0,1503.04,TONS,27205000,37.6469,-121.8888
2,2018,216299,GRAPESWINE,1,Alameda,3390.0,5.34,18100.0,1503.04,TONS,27205000,37.6469,-121.8888
3,2017,216299,GRAPESWINE,1,Alameda,2420.0,5.08,12300.0,1447.32,TONS,17802000,37.6469,-121.8888
4,2016,216299,GRAPESWINE,1,Alameda,2630.0,4.83,12700.0,1432.2,TONS,18189000,37.6469,-121.8888


In [5]:
data.to_csv('california_wine_production.zip', index = False)

In [6]:
# min and max for each column

# harvested acres
print('Harvested Acres')
print(np.min(data['HarvestedAcres']))
print(np.max(data['HarvestedAcres']))

print('Yield (Unit/Acre)')
# yield (unit/acre)
print(np.min(data['YieldUnitAcre']))
print(np.max(data['YieldUnitAcre']))

print('Production')
# production
print(np.min(data['Production']))
print(np.max(data['Production']))

Harvested Acres
3.0
128613.0
Yield (Unit/Acre)
0.06
25.0
Production
23.0
1040100.0


In [7]:
# creating full values for displaying data
sums = data.groupby('County').sum()
print(np.max(sums['ValueDollars']))
print(np.min(sums['ValueDollars']))


14101751300
216800
