In [1]:
import pandas as pd
import numpy as np
import os
import operator as op
file_num = 2

## Input NGO, Year

folder_dir = "../../../../Data/Canonical/"
fresh_food = ["Vegitable", "Leafy Veg", "Ground Veg", "Soy Products", "Fruit", "Bread", "Meat", "Seafood", "Cooked Food", "Fresh Other"]
package_food = ["Staple", "Frozen", "Condiment", "Drinks", "Milk Powder", "Packaged Other"]

def check_fresh(element):
	return element.canonical in fresh_food

def getYear(element):
	return element.datetime.year

def getMonth(element):
	return element.datetime.month

def getDay(element):
	return element.datetime.day

def find(the_series, the_value):
    return (''.join(map(str,[the_series for the_series, x in enumerate(the_series) if x == the_value])))

def fixAllmth(the_series):
	the_list = []
	check_list = the_series.index.tolist()
	for mth in range(1, 13):
		if(find(check_list, mth).isdigit()):
			the_list = the_list + [the_series[mth]]
		else:
			the_list = the_list + [0]
	return the_list

def getList(the_df, target_element):
	return the_df[the_df.element == target_element].ix[: , 1:].values[0]

def genRow(the_name, the_series):
	the_series = fixAllmth(the_series)
	return [the_name] + the_series

def getMonthNum(element):
	return ([i for i, x in enumerate(['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']) if x == element][0] + 1)

def getMonthDays(element, year):
	monrh_days = [31, 28, 31, 30, 31, 30, 31, 30, 30, 31, 30, 31]
	result = monrh_days[element - 1]
	if year % 4 == 0 & element == 2:
		result = 29
	return result

In [3]:
ngo = 'PCSS'
year = 2015

In [20]:
datafile_name = ngo + '.' + str(year) + '.csv'
mapfile_name = ngo + '.map.csv'
donorsfile_name = ngo + '.donors.csv'
distfile_name = ngo + '.' + str(year) + '.distribution.csv'
benffile_name = ngo + '.' + str(year) + '.beneficiary.csv'
procfile_name = ngo + '.' + str(year) + '.processing.csv'
finfile_name = ngo + '.' + str(year) + '.finance.csv'

# Load the data in
df = pd.read_csv(folder_dir + ngo + '/' + datafile_name)
df_map = pd.read_csv(folder_dir + ngo + '/' + mapfile_name)
df_donors = pd.read_csv(folder_dir + ngo + '/' + donorsfile_name)
df_dist = pd.read_csv(folder_dir + ngo + '/' + distfile_name)
df_benf = pd.read_csv(folder_dir + ngo + '/' + benffile_name)
df_proc = pd.read_csv(folder_dir + ngo + '/' + procfile_name)
df_fin = pd.read_csv(folder_dir + ngo + '/' + finfile_name)

if os.path.isfile(folder_dir + ngo + '/' + ngo + '.' + str(year - 1) + '.csv'):
    df = pd.concat([df, pd.read_csv(folder_dir + ngo + '/' + ngo + '.' + str(year - 1) + '.csv')])

if os.path.isfile(folder_dir + ngo + '/' + ngo + '.' + str(year - 1) + '.distribution.csv'):
    df_dist = pd.concat([df_dist, pd.read_csv(folder_dir + ngo + '/' + ngo + '.' + str(year - 1) + '.distribution.csv')])

if os.path.isfile(folder_dir + ngo + '/' + ngo + '.' + str(year - 1) + '.processing.csv'):
    df_proc = pd.concat([df_proc, pd.read_csv(folder_dir + ngo + '/' + ngo + '.' + str(year - 1) + '.processing.csv')])

In [21]:
## Collection
# Reshape the dataframe
df['datetime'] = pd.to_datetime(df['datetime'])
df = df[df.apply(getYear, axis=1) == year]
df = df.fillna(0)

melt_head = ['datetime', 'donor', 'organisation_id', 'programme']
rest_col = [x for x in list(df.columns.values) if x not in melt_head]

df = pd.melt(df, id_vars=melt_head, value_vars=rest_col)
df['datetime'] = pd.to_datetime(df['datetime'])
df = df[df.value != 0]
df = df[df['value'].notnull()]

In [22]:
df.head()

Unnamed: 0,datetime,donor,organisation_id,programme,variable,value
0,2015-01-02,TKWM,PCSS,general,蔬菜,38.8
1,2015-01-03,TKWM,PCSS,general,蔬菜,21.7
2,2015-01-05,TKWM,PCSS,general,蔬菜,33.2
3,2015-01-07,TKWM,PCSS,general,蔬菜,40.8
4,2015-01-08,TKWM,PCSS,general,蔬菜,38.9


In [23]:
#print(df.head(100))
df_map = df_map[df_map.organisation_id == ngo]
df_map = df_map[['category', 'canonical']]
df_map = df_map.drop_duplicates()

df_merge = pd.merge(df, df_map, how='left', left_on=['variable'], right_on=['category'])
df_merge = df_merge.drop('category', 1)

In [24]:
df_merge.head()

Unnamed: 0,datetime,donor,organisation_id,programme,variable,value,canonical
0,2015-01-02,TKWM,PCSS,general,蔬菜,38.8,
1,2015-01-03,TKWM,PCSS,general,蔬菜,21.7,
2,2015-01-05,TKWM,PCSS,general,蔬菜,33.2,
3,2015-01-07,TKWM,PCSS,general,蔬菜,40.8,
4,2015-01-08,TKWM,PCSS,general,蔬菜,38.9,


In [25]:
df_donors = df_donors[['id', 'foodshare_category']]
df_donors.rename(columns={'id': 'id', 'foodshare_category': 'donor_category'}, inplace=True)

#print(df_donors)

df_merge = pd.merge(df_merge, df_donors, how='left', left_on=['donor'], right_on=['id'])
df_merge = df_merge.drop('id', 1)
df_merge['donor_category'] = df_merge['donor_category'].astype(basestring)

df_merge['isFresh'] = df_merge.apply(check_fresh, axis=1)
df_merge['year'] = df_merge.apply(getYear, axis=1)
df_merge['month'] = df_merge.apply(getMonth, axis=1)
df_merge['day'] = df_merge.apply(getDay, axis=1)
df_merge = df_merge[df_merge['year'] == year]

In [26]:
df_donors.head()

Unnamed: 0,id,donor_category
0,TKWM,wet market
1,KLCR,shop
2,Temple,temple
3,Banq,corporate
4,food_d,supplier


In [27]:
## Distribution
df_dist['datetime'] = pd.to_datetime(df_dist['datetime'])
df_dist['year'] = df_dist.apply(getYear, axis=1)
df_dist['month'] = df_dist.apply(getMonth, axis=1)
df_dist['day'] = df_dist.apply(getDay, axis=1)
df_dist = df_dist[df_dist['year'] == year]
## TODO: Need to check for pervious/next year file

## Processing
df_proc['datetime'] = pd.to_datetime(df_proc['datetime'])
df_proc['year'] = df_proc.apply(getYear, axis=1)
df_proc['month'] = df_proc.apply(getMonth, axis=1)
df_proc['day'] = df_proc.apply(getDay, axis=1)
df_proc = df_proc[df_proc['year'] == year]


##Fin
df_fin.columns = ['month', 'income', 'expenditure']
df_fin['month_num'] = (df_fin.index + 1)

df_merge = df_merge[df_merge['donor_category'].notnull()]

df_merge = df_merge[['datetime', 'donor', 'organisation_id', 'programme', 'variable', 'value', 'canonical', 'donor_category', 'isFresh', 'year', 'month', 'day']]

# Empty DF for report
columns = ['element'] + map(str, range(1,13))
df_report = pd.DataFrame(columns=columns)

In [28]:
df_merge.head()

Unnamed: 0,datetime,donor,organisation_id,programme,variable,value,canonical,donor_category,isFresh,year,month,day
0,2015-01-02,TKWM,PCSS,general,蔬菜,38.8,,wet market,False,2015,1,2
1,2015-01-03,TKWM,PCSS,general,蔬菜,21.7,,wet market,False,2015,1,3
2,2015-01-05,TKWM,PCSS,general,蔬菜,33.2,,wet market,False,2015,1,5
3,2015-01-07,TKWM,PCSS,general,蔬菜,40.8,,wet market,False,2015,1,7
4,2015-01-08,TKWM,PCSS,general,蔬菜,38.9,,wet market,False,2015,1,8
