In [1]:
# imports
import os
import glob
import pandas as pd 
import altair as alt
# import altair_viewer

In [2]:
# load in dataframes and concatenate together
path = (
    "C:/Users/jacob.hester/Documents/Python_scripts/"\
    "Historical_beef_cattle_prices/data/processed"
    )
raw_files_path = os.path.join(path, "*.csv")
df_list = []
for f in glob.glob(raw_files_path):
    print(f)
    df = pd.read_csv(f, index_col=None, header=0)
    df_list.append(df)
df = pd.concat(df_list, ignore_index = True)

C:/Users/jacob.hester/Documents/Python_scripts/Historical_beef_cattle_prices/data/processed\2004_prices.csv
C:/Users/jacob.hester/Documents/Python_scripts/Historical_beef_cattle_prices/data/processed\2005_prices.csv
C:/Users/jacob.hester/Documents/Python_scripts/Historical_beef_cattle_prices/data/processed\2006_prices.csv
C:/Users/jacob.hester/Documents/Python_scripts/Historical_beef_cattle_prices/data/processed\2007_prices.csv
C:/Users/jacob.hester/Documents/Python_scripts/Historical_beef_cattle_prices/data/processed\2008_prices.csv
C:/Users/jacob.hester/Documents/Python_scripts/Historical_beef_cattle_prices/data/processed\2009_prices.csv
C:/Users/jacob.hester/Documents/Python_scripts/Historical_beef_cattle_prices/data/processed\2011_prices.csv
C:/Users/jacob.hester/Documents/Python_scripts/Historical_beef_cattle_prices/data/processed\2012_prices.csv
C:/Users/jacob.hester/Documents/Python_scripts/Historical_beef_cattle_prices/data/processed\2013_prices.csv
C:/Users/jacob.hester/Docume

In [3]:
# check data types
df.dtypes

Year              int64
Type             object
Weight_group     object
Hd Cnt           object
Jan             float64
Feb             float64
Mar             float64
Apr             float64
May             float64
Jun             float64
Jul             float64
Aug             float64
Sep             float64
Oct             float64
Nov             float64
Dec             float64
Avg             float64
dtype: object

In [4]:
# rename Hd Cnt col and change type 
# NOTE: column was read in as object type because of commas in head counts
df.rename(columns = {'Hd Cnt' : 'Hd_cnt'}, inplace = True)
df['Hd_cnt'] = df['Hd_cnt'].str.replace(',', '')
data_types_dictionary = {'Hd_cnt' : float}
df = df.astype(data_types_dictionary)

In [5]:
# check for missing values 
df.isnull().sum()

Year               0
Type               0
Weight_group       0
Hd_cnt             0
Jan             1546
Feb             1527
Mar             1468
Apr             1444
May             1533
Jun             1533
Jul             1685
Aug             1544
Sep             1557
Oct             1459
Nov             1553
Dec             1683
Avg                0
dtype: int64

In [6]:
# look at overall sales per year 
sales_per_yr_df = df.groupby(['Year'])['Hd_cnt'].sum()
sales_per_yr_df = pd.DataFrame(sales_per_yr_df)
sales_per_yr_df.reset_index(inplace = True)

# change Year to datetime type 
sales_per_yr_df['Year'] = pd.to_datetime(
    sales_per_yr_df['Year'].astype(str), format='%Y'
    )

In [7]:
# sales per year visual
# alt.renderers.enable('altair_viewer')

# add x index values for visual
x_year_domain = ['2003-01-01', '2019-01-01']
sales_per_year = alt.Chart(sales_per_yr_df).mark_bar(size = 20).encode(
    x = alt.X(
        'Year', 
        axis = alt.Axis(
            titleFontSize = 14
            ),
        scale = alt.Scale(domain = x_year_domain)
            ),
    y = alt.Y(
        'Hd_cnt', 
        axis = alt.Axis(
            title = 'Head Count',
            titleFontSize = 14
            )
        )
    ).properties(
            title = {
                "text" : "Overall Sales Per Year",
                "subtitle" : "Beef Cattle Sales in Alabama"
                },
            width = 700
    ).configure_axisY(
        titleAngle = 0,
        titleX = -100,
    ).configure_axisX(
        titleX = 350,
        titleY = 30
    ).configure_title(
        fontSize = 18   
    )   
sales_per_year

In [8]:
# look at average overall prices per year
prices_per_yr_df = df.groupby(['Year'])['Avg'].mean()
prices_per_yr_df = pd.DataFrame(prices_per_yr_df)
prices_per_yr_df.reset_index(inplace = True)
prices_per_yr_df['Year'] = pd.to_datetime(
    prices_per_yr_df['Year'].astype(str), format='%Y'
    )
prices_per_yr_df.head()

prices_per_year = alt.Chart(prices_per_yr_df).mark_bar(size = 20).encode(
    x = alt.X(
        'Year', 
        axis = alt.Axis(
            titleFontSize = 14
            ),
        scale = alt.Scale(domain = x_year_domain)
            ),
    y = alt.Y(
        'Avg', 
        axis = alt.Axis(
            title = 'Average Price',
            titleFontSize = 14
            )
        )
    ).properties(
            title = {
                "text" : "Average Price Per Year",
                "subtitle" : "Beef Cattle Sales in Alabama"
                },
            width = 700
    ).configure_axisY(
        titleAngle = 0,
        titleX = -100,
    ).configure_axisX(
        titleX = 350,
        titleY = 30
    ).configure_title(
        fontSize = 18   
    )
prices_per_year

In [9]:
# look at average prices per month 
price_per_month_df = pd.melt(
    df, 
    id_vars = ['Year'], 
    value_vars = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
)
# start by dropping missing values 
price_per_month_df = price_per_month_df.dropna()

price_per_month_df = price_per_month_df.groupby(['Year', 'variable'])['value'].mean()
price_per_month_df = pd.DataFrame(price_per_month_df)
price_per_month_df.reset_index(inplace = True)
price_per_month_df['Year'] = pd.to_datetime(
    price_per_month_df['Year'].astype(str), format='%Y'
)

In [10]:
price_per_month_df.head()

Unnamed: 0,Year,variable,value
0,2004-01-01,Apr,98.259673
1,2004-01-01,Aug,111.039098
2,2004-01-01,Dec,105.845301
3,2004-01-01,Feb,176.858529
4,2004-01-01,Jan,125.400099
