In [72]:
import pandas as pd
import numpy as np
import math

from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go
import plotly.figure_factory as ff

from IPython.display import display, HTML

init_notebook_mode(connected=True)

survey_v1 = pd.read_excel("Kassala_baseline_survey_for_CC_project_in_Sudan_final_results.xlsx", encoding = 'utf_8')
survey_v2 = pd.read_excel("Kassala_baseline_survey_for_CC_project_in_Sudan_final_results_v2.xlsx", encoding = 'utf_8')
reg_survey_complete = pd.concat([survey_v1, survey_v2])

# <center>Baseline survey results for the locality of Telkouk </center>
### <center>*Climate Change and Livelihoods situation of 15 villages located in Telkouk Locality, Kassala State*</center>

In [73]:
HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
$( document ).ready(code_toggle);
</script>
The raw code for this IPython notebook is by default hidden for easier reading.
To toggle on/off the raw code, click <a href="javascript:code_toggle()">here</a>.''')

#bc_wages.head()
#reg_csv.info()
#bc_wages.describe()

In [74]:
# Functions

# Get ocurrences of each value for a multiple_select field
def get_ocurrences(df, column_name):
    values_sums = df.groupby(column_name).size()
    values_names = reduce(lambda x, y: x + y,map(lambda x: x.split(), values_sums.index.values))
    result = pd.Series()
    for i in values_names:
        result[i] = values_sums.loc[[i in idx for idx in values_sums.index]].sum()
    return result

def get_percentiles_values(sorted_serie, percentiles):
    if len(percentiles) == 0:
        return []
    else:
        quantity = sorted_serie.sum() * percentiles[0]
        for i in sorted_serie.index:
            if sorted_serie[i] >= quantity:
                return [i] + get_percentiles_values(sorted_serie, percentiles[1:])
            else:
                quantity -= sorted_serie[i]

def get_percentiles(sorted_serie, values):
    if len(values) == 0:
        return []
    else:
        result = 0
        for i in sorted_serie.index:
            if values[0] == i:
                return [float(result + sorted_serie[i]) / sorted_serie.sum()] + get_percentiles(sorted_serie, values[1:])
            elif values[0] < i:
                return [float(result) / sorted_serie.sum()] + get_percentiles(sorted_serie, values[1:])
            else:
                result += sorted_serie[i]
        return [1]

In [75]:
## File preprocessing
#Deleting 'demography:', 'house_assets:', 'incomes:', 'climate_change:' and those prefix which not provide useful info.

col_names = reg_survey_complete.columns
col_names = col_names.map(lambda x: x[11:] if x.startswith("demography:") else x)
col_names = col_names.map(lambda x: x[19:] if x.startswith("responding_section:") else x)
col_names = col_names.map(lambda x: x[20:] if x.startswith("responding_section2:") else x)
col_names = col_names.map(lambda x: x[13:] if x.startswith("house_assets:") else x)
col_names = col_names.map(lambda x: x[8:] if x.startswith("incomes:") else x)
col_names = col_names.map(lambda x: x[15:] if x.startswith("climate_change:") else x)
col_names = col_names.map(lambda x: x[11:] if x.startswith("intro_data:") else x)
col_names = col_names.map(lambda x: x[:11] + x[16:] if x.startswith("respondant_page:") else x)
col_names = col_names.map(lambda x: x[16:] if x.startswith("general_hh_data:") else x)
col_names = col_names.map(lambda x: x[6:] if x.startswith("males:") else x)
col_names = col_names.map(lambda x: x[8:] if x.startswith("females:") else x)
col_names = col_names.map(lambda x: x[11:] if x.startswith("importance:") else x)
col_names = col_names.map(lambda x: x[19:] if x.startswith("percentage_incomes:") else x)
col_names = col_names.map(lambda x: x[:10] + x[16:] if x.startswith("agri_gard:agri") else x)
col_names = col_names.map(lambda x: x[:10] + x[20:] if x.startswith("livestock:livestock_") else x)
col_names = col_names.map(lambda x: x[:10] + x[21:] if x.startswith("livestock:livestock") else x)
col_names = col_names.map(lambda x: x[:8] + x[17:] if x.startswith("fishing:fishing") else x)
col_names = col_names.map(lambda x: x[:7] + x[15:] if x.startswith("labour:labour") else x)
col_names = col_names.map(lambda x: x[:7] + x[14:] if x.startswith("labour:labour") else x)
col_names = col_names.map(lambda x: x[:12] + x[25:] if x.startswith("handicrafts:handicrafts") else x)
col_names = col_names.map(lambda x: x[:9] + x[19:] if x.startswith("forestry:forestry") else x)
col_names = col_names.map(lambda x: x[:6] + x[13:] if x.startswith("other:other") else x)
col_names = col_names.map(lambda x: x[7:] if x.startswith("coping:") else x)
col_names = col_names.map(lambda x: x[4:] if x.startswith(("cc1:", "cc2:", "cc3:", "ews:")) else x)
col_names = col_names.map(lambda x: x[10:] if x.startswith("renewable:") else x)
col_names = col_names.map(lambda x: x[12:] if x.startswith("agriculture_cc_effects:") else x)
col_names = col_names.map(lambda x: x[10:] if x.startswith("livestock_cc_effects:") else x)

In [76]:
# Replace ':' with '_' for compatibility reasons.
col_names = col_names.map(lambda x: x[:14] + '_' + x[15:] if x.startswith("geocoordinates:") else x)
col_names = col_names.map(lambda x: x[:9] + '_' + x[10:] if x.startswith("agri_gard:") else x)
col_names = col_names.map(lambda x: x[:9] + '_' + x[10:] if x.startswith("livestock:") else x)
col_names = col_names.map(lambda x: x[:7] + '_' + x[8:] if x.startswith("fishing:") else x)
col_names = col_names.map(lambda x: x[:6] + '_' + x[7:] if x.startswith("labour:") else x)
col_names = col_names.map(lambda x: x[:11] + '_' + x[12:] if x.startswith("handicrafts:") else x)
col_names = col_names.map(lambda x: x[:8] + '_' + x[9:] if x.startswith("forestry:") else x)
col_names = col_names.map(lambda x: x[:5] + '_' + x[6:] if x.startswith("other:") else x)
col_names = col_names.map(lambda x: x[:10] + '_' + x[11:] if x.startswith("cc_effects:") else x)
col_names = col_names.map(lambda x: x[:4] + '_' + x[5:] if x.startswith("meta:") else x)

col_names = col_names.map(lambda x: 'livestock_lost_goat' if x == 'livestock_lost_goats' else x)

print col_names.values

reg_survey_complete.columns = col_names

[u'deviceid' u'today' u'surveyor_name' u'number' u'village'
 u'geocoordinates_Latitude' u'geocoordinates_Longitude'
 u'geocoordinates_Altitude' u'geocoordinates_Accuracy' u'responding'
 u'respondant_gender' u'respondant_age' u'respondant_marital_status'
 u'respondant_head_hh' u'education' u'handicapped_member' u'number_wifes'
 u'number_wifes_aux' u'male_infant' u'male_child' u'male_child_school'
 u'male_teenager' u'male_teenager_school' u'male_adult' u'male_elderly'
 u'female_infant' u'female_child' u'female_child_school' u'female_teenager'
 u'female_teenager_school' u'female_adult' u'female_elderly'
 u'family_males' u'family_females' u'family_total' u'family_confirm'
 u'house_material' u'belongings' u'num_beds' u'kitchen_type' u'src_incomes'
 u'imp_agri' u'imp_garden' u'imp_livestock' u'imp_fishing' u'imp_labour'
 u'imp_forestry' u'imp_handcrafts' u'imp_other' u'perc_incomes_agri'
 u'perc_incomes_garden' u'perc_incomes_livestock' u'perc_incomes_fishing'
 u'perc_incomes_labour' u'perc_

In [77]:
# Processing NaN and void values

values = {
    'respondant_marital_status': 'no_answer',
    'house_material': 'no_answer',
    'kitchen_type': 'no_answer',
    'src_incomes': 'none',
    'use_ews': 'no_answer',
    'renewable_knowledge': 'no_answer',
    'solar_energy': 'no_answer',
    'livestock_selling': 'no_answer',
    'livestock_social': 'no_answer',
    'livestock_milk': 'no_answer',
    'livestock_meat': 'no_answer',
    'livestock_savings': 'no_answer',
    'livestock_number_goat': 0,
    'livestock_number_sheep': 0,
    'livestock_number_camel': 0,
    'livestock_number_cattle': 0,
    'livestock_number_chicken': 0,
    'labour_participants': 'no_answer',
    'labour_sector': '',
    'agri_gard_water_harv_farmlands': 0,
    'agri_gard_land_quantity': 0,
    'cc_effects_sand_affected_farmlands': 0
}
reg_survey_complete.fillna(value=values, inplace=True)

In [78]:
# Creating villages dict
villages = reg_survey_complete['village'].unique()
villages_table = pd.read_excel("villages_kassala.xlsx")
# Villages dictionary
villages_dict = dict()
villages_dict['ar'] = villages_table.set_index('survey_code').to_dict()['Arabic name']
villages_dict['en'] = villages_table.set_index('survey_code').to_dict()['English transcription']
#print repr(villages_dict).decode('unicode_escape')
reg_survey_complete['village'] = reg_survey_complete.village.map(villages_dict['en'])
print villages_dict

{'ar': {1: u'\u062a\u0644\u0643\u0648\u0643  \u062a\u0645\u0627\u064a\u0627', 2: u'\u0642\u062f\u0645\u0627\u064a\u064a\u0628 \u0627\u062f\u0643\u0647\u0646\u0627\u0628 ', 3: u'\u0645\u0627\u0645\u0627\u0646 \u0643\u0627\u0645\u0631\u0642\u062f\u0627\u0628', 4: u'\u0627\u0631\u0647\u0627\u064a\u064a\u062a', 5: u'\u0642\u062f\u0627\u0645\u0627\u064a\u0628 \u0627\u0644\u0642\u062f\u064a\u0645 ', 6: u'\u0627\u0648\u0627\u064a\u064a\u0628', 7: u'\u0644\u0643\u0648\u0643 \u0627\u0644\u0627\u064a\u064a\u062a', 8: u'\u0628\u0627\u0647\u0627\u062c\u064a\u0644', 9: u'\u0642\u0627\u0628\u0631\u064a\u064a\u062a', 10: u'\u0647\u0627\u0645\u062a\u064a\u0627\u064a \u0627\u0628\u0631\u0645\u0627\u0628', 11: u'\u062e\u0627\u0634\u0645 \u062a\u0647\u062f\u0627\u0649', 12: u'\u0628\u0644\u0627\u064a\u062a', 14: u'\u062a\u0627\u0643\u0631\u0648\u0641', 15: u'\u0645\u0627\u0645\u0627\u0646 \u0628\u0644\u0645\u064a\u0643\u0644\u0627\u0628', 16: u'\u062a\u0645\u0643\u0631\u0641'}, 'en': {1: u'Talkouk Tamay 

In [79]:
# For answered surveys
reg_survey = reg_survey_complete.loc[reg_survey_complete['responding'].str.match('yes')].copy()

In [80]:
# Creating num of LH strategies
reg_survey['num_lh_strats'] = reg_survey['src_incomes'].map(lambda x: len(x.split()))

In [81]:
# Separate male and female
reg_survey_male = reg_survey.loc[reg_survey['respondant_gender'].str.match('male')].copy()
reg_survey_female = reg_survey.loc[reg_survey['respondant_gender'].str.match('female')].copy()

In [82]:
## Separate per HoHH gender

def cuantos(df):
    print df['deviceid'].count()

# Respondent is HH
reg_survey['married'] = reg_survey['respondant_marital_status'].str.match('married')
# Females HoHH
reg_survey_female_head = reg_survey[reg_survey['respondant_gender'].str.match('female') &
                                      reg_survey['respondant_head_hh'].str.match('yes')].copy()
cuantos(reg_survey_female_head)
# Females noHoHH
reg_survey_female_no_head = reg_survey[reg_survey['respondant_gender'].str.match('female') &
                                      reg_survey['respondant_head_hh'].str.match('no')].copy()

# Males HoHH + Female noHoHH married
reg_survey_male_head = reg_survey[reg_survey['respondant_gender'].str.match('male') &
                                      reg_survey['respondant_head_hh'].str.match('yes')].copy()

# 0
reg_survey_male_no_head = reg_survey[reg_survey['respondant_gender'].str.match('male') &
                                      reg_survey['respondant_head_hh'].str.match('no')].copy()

reg_survey_male_head = pd.concat([reg_survey_male_head,
                                  reg_survey_female_no_head[reg_survey_female_no_head['married']]])

# Add females nHoHH nM with male_adult to male_head
reg_survey_female_nHoHH_nM = reg_survey_female_no_head[~reg_survey_female_no_head['married']]
reg_survey_male_head = pd.concat([reg_survey_male_head,
                                  reg_survey_female_nHoHH_nM[reg_survey_female_nHoHH_nM['male_adult'] > 0]])

# Add females nHoHH nM without male_adult to female_head
reg_survey_female_head = pd.concat([reg_survey_female_head,
                                    reg_survey_female_nHoHH_nM[reg_survey_female_nHoHH_nM['male_adult'] == 0]])

# Add males nHoHH with male_adult to male_head
reg_survey_male_nHoHH = reg_survey[reg_survey['respondant_gender'].str.match('male') &
                                      reg_survey['respondant_head_hh'].str.match('no')].copy()
reg_survey_maleHoHH = pd.concat([reg_survey_male_head,
                                  reg_survey_male_nHoHH[reg_survey_male_nHoHH['male_adult'] > 0]])

# Add females nHoHH with male_adult to female_head
reg_survey_femHoHH = pd.concat([reg_survey_female_head,
                                  reg_survey_male_nHoHH[reg_survey_male_nHoHH['male_adult'] == 0]])


271


In [83]:
HoHH = {}
HoHH['Male HoHH'] = reg_survey_maleHoHH['deviceid'].count()
HoHH['Female HoHH'] = reg_survey_femHoHH['deviceid'].count()

HoHH = pd.Series(HoHH)

fig = {
    "layout": {
        "title": "Households according to the gender of the HoHH"
    },
    "data": [
        {
            "values": HoHH.values,
            "labels": HoHH.index.values,
            "type": "pie",
            "sort": False
        }
    ]
}

iplot(fig)

In [84]:
reg_survey_hand_yes = reg_survey.loc[reg_survey.handicapped_member == 'yes'].copy()
reg_survey_hand_no = reg_survey.loc[reg_survey.handicapped_member == 'no'].copy()
print reg_survey['deviceid'].count()
yes_no_number = [reg_survey_hand_yes['deviceid'].count(), reg_survey_hand_no['deviceid'].count()]
yes_no_values = ['yes', 'no']

fig = {
    "layout": {
        "title": "Households with handicapped members"
    },
    "data": [
        {
            "values": yes_no_number,
            "labels": yes_no_values,
            "type": "pie",
            "sort": False
        }
    ]
}

iplot(fig)

384


In [85]:
def create_graphs_HoHH(data_func, values_func, labels_func, sum_func, title):
    create_three_pie_graphs((reg_survey_femHoHH, reg_survey, reg_survey_maleHoHH), data_func, values_func,
                            labels_func, sum_func, title,
                            "Female Head of Household", .04,
                            "Global", .5,
                            "Male Head of Household", .94)

def create_graphs_respondent(data_func, values_func, labels_func, sum_func, title):
    create_three_pie_graphs((reg_survey_female, reg_survey, reg_survey_male), data_func, values_func,
                            labels_func, sum_func, title,
                            "Female Respondent", .07,
                            "Global", .5,
                            "Male Respondent", .91)

def create_pie_graph(serie, title):
    fig = {
        "layout": {
            "title": title
        },
        "data": [
            {
                "values": serie.values,
                "labels": serie.index.values,
                "type": "pie",
                "sort": False
            }
        ]
    }

    iplot(fig)
    
    
def create_three_pie_graphs((df1, df2, df3), data_func, values_func,
                            labels_func, sum_func, title,
                            title1, title1_x_offset,
                            title2, title2_x_offset,
                            title3, title3_x_offset):
    data_fem = data_func(df1)
    data_global = data_func(df2)
    data_male = data_func(df3)

    fig = {
        "layout": {
            "title": title,
            "annotations": [
                {
                    "font": {
                        "size": 14
                    },
                    "showarrow": False,
                    #"text": title1 + " (" + str(sum_func(data_fem)) + ")",
                    "text": title1,
                    "x": title1_x_offset,
                    "y": 1
                },
                {
                    "font": {
                        "size": 14
                    },
                    "showarrow": False,
                    #"text": title2 + " (" + str(sum_func(data_global)) + ")",
                    "text": title2,
                    "x": title2_x_offset,
                    "y": 1
                },
                {
                    "font": {
                        "size": 14
                    },
                    "showarrow": False,
                    #"text": title3 + " (" + str(sum_func(data_male)) + ")",
                    "text": title3,
                    "x": title3_x_offset,
                    "y": 1
                }
            ]
        },
        "data": [
            {
                "values": values_func(data_fem),
                "labels": labels_func(data_fem),
                "domain": {
                    "x": [0, .32]
                },
                "type": "pie",
                "sort": False
            },
            {
                "values": values_func(data_global),
                "labels": labels_func(data_global),
                "domain": {
                    "x": [.34, .64]
                },
                "type": "pie",
                "sort": False
            },
            {
                "values": values_func(data_male),
                "labels": labels_func(data_male),
                "domain": {
                    "x": [.66, .98]
                },
                "type": "pie",
                "sort": False
            }
        ]
    }

    iplot(fig)

def create_stacked_bars_graph((df1, df2), data_func, x_func, y_func, title,
                              title_df1, title_df2):
    data_global = data_func(pd.concat([df1, df2]))
    data_fem = data_func(df1)
    data_male = data_func(df2)

    fig = {
        "layout": {
            "title": title,
            "xaxis": {
                "tickangle": -45
            },
            "barmode": "stack",
            "width": 700,
            "annotations": []
        },
        "data": [
            {
                "x": x_func(data_fem),
                "y": y_func(data_fem),
                "type": "bar",
                "name": title_df1,
                "marker": {
                    "color": 'rgb(225,225,258)',
                    "line": {
                        "color": 'rgb(0,0,192)',
                        "width": 1.5
                    }
                },
            },
            {
                "x": x_func(data_male),
                "y": y_func(data_male),
                "type": "bar",
                "name": title_df2,
                "marker": {
                    "color": 'rgb(258,225,225)',
                    "line": {
                        "color": 'rgb(192,0,0)',
                        "width": 1.5
                    }
                },
            }
        
        ]
    }


#    for i in range(0, len(kitchen_type)):
#        fig["layout"]["annotations"].append(
#            {
#                 "x": map(lambda x: x.capitalize(), kitchen_perc.index.values)[i],
#                 "y": kitchen_perc.values[i] + 1.5,
#                "x": pd.concat([df1, df2])[i],
#                "y": y_func(data_global)[i] + 1.5,
#                "text": str(data_global.values[i]) + "%",
#                "font": dict(
#                    family='Arial',
#                    size=14,
#                    color='rgba(0, 0, 100, 1)'
#                ),
#                "showarrow": False,
#    
#            }
#        )
#
#
    iplot(fig)
    

def plot_wealth_criteria(groupby_data, values, title):
    groupby_data = groupby_data * 100 / groupby_data.sum()
    poor = groupby_data[groupby_data.index <= values[0]]
    medium = groupby_data[groupby_data.index > values[0]]
    medium = medium[medium.index <= values[1]]
    rich = groupby_data[groupby_data.index > values[1]]
    rich = rich[rich.index <= values[2]]
    print medium
    
    fig_distribution = {
        "layout": {
            "title": title,
            "yaxis": {
                "ticksuffix": "%"
            },
            "width": 700,
            "annotations": []
        },
        "data": [
            {
                "x": poor.index.values,
                "y": poor.values,
                "type": "bar",
                "name": "Very poor (" + str(round(poor.values.sum(), 1)) + "%)" ,
                "marker": {
                    "color": 'rgb(214,39,40)',
                }
            },
            {
                "x": medium.index.values,
                "y": medium.values,
                "type": "bar",
                "name": "Poor (" + str(round(medium.values.sum(), 1)) + "%)",
                "marker": {
                    "color": 'rgb(255,127,14)',
                }
            },
            {
                "x": rich.index.values,
                "y": rich.values,
                "type": "bar",
                "name": "Medium (" + str(round(rich.values.sum(), 1)) + "%)",
                "marker": {
                    "color": 'rgb(44,160,44)',
                }
            }
        ]
    }
    
    iplot(fig_distribution)


## General data

Number of Surveys:

In [86]:
print "Total:" + str(reg_survey_complete['deviceid'].count())
print ""
total_surveys = reg_survey['deviceid'].count()
print "Responding:" + str(total_surveys)
print ""

Total:399

Responding:384



### Respondents per gender and village

In [87]:
surveys_village = reg_survey.groupby('village').size()
surveys_village_male = reg_survey_male.groupby('village').size()
surveys_village_female = reg_survey_female.groupby('village').size()
respondents_gender = pd.DataFrame(surveys_village, columns= ['Total'])
respondents_gender['Male'] = surveys_village_male
respondents_gender['Male %'] = (surveys_village_male*100 / surveys_village).round(2)
respondents_gender['Female'] = surveys_village_female
respondents_gender['Female %'] = (surveys_village_female*100 / surveys_village).round(2)
respondents_gender = respondents_gender.reindex(['Male', 'Male %', 'Female', 'Female %', 'Total'], axis=1)
respondents_gender.loc['Total', 'Total'] = respondents_gender['Total'].sum()
respondents_gender.loc['Total', 'Male'] = respondents_gender['Male'].sum()
respondents_gender.loc['Total', 'Female'] = respondents_gender['Female'].sum()
respondents_gender.fillna({'Male': 0, 'Female': 0}, inplace=True)
respondents_gender.loc['Total', 'Male %'] = round(100*respondents_gender.loc['Total', 'Male'] / respondents_gender.loc['Total', 'Total'], 2)
respondents_gender.loc['Total', 'Female %'] = round(100*respondents_gender.loc['Total', 'Female'] / respondents_gender.loc['Total', 'Total'], 2)
respondents_gender['Male'] = respondents_gender['Male'].astype(int)
respondents_gender['Female'] = respondents_gender['Female'].astype(int)
respondents_gender['Total'] = respondents_gender['Total'].astype(int)

display(respondents_gender)

Unnamed: 0_level_0,Male,Male %,Female,Female %,Total
village,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Bahajeil,0,,22,100.0,22
Eirhagayeet,4,16.67,20,83.33,24
Gabrayeet,0,,11,100.0,11
Gadamayeeb Adkihinab,0,,22,100.0,22
Gadamayeeb Algadima,0,,35,100.0,35
Hamtiyay Abrhemab,0,,1,100.0,1
Khashum Tahday,2,7.41,25,92.59,27
Maman Kameragadap,3,8.33,33,91.67,36
Maman belimiklab,0,,13,100.0,13
Pelait,0,,28,100.0,28


### HHs Composition

In [88]:
surveys_village_maleHoHH = reg_survey_maleHoHH.groupby('village').size()
surveys_village_femHoHH = reg_survey_femHoHH.groupby('village').size()
HoHH_gender = pd.DataFrame(surveys_village, columns= ['Total'])
HoHH_gender['Male'] = surveys_village_maleHoHH
HoHH_gender['Male %'] = (surveys_village_maleHoHH*100 / surveys_village).round(2)
HoHH_gender['Female'] = surveys_village_femHoHH
HoHH_gender['Female %'] = (surveys_village_femHoHH*100 / surveys_village).round(2)
HoHH_gender = HoHH_gender.reindex(['Male', 'Male %', 'Female', 'Female %', 'Total'], axis=1)
HoHH_gender.loc['Total', 'Total'] = HoHH_gender['Total'].sum()
HoHH_gender.loc['Total', 'Male'] = HoHH_gender['Male'].sum()
HoHH_gender.loc['Total', 'Male %'] = round(100*HoHH_gender.loc['Total', 'Male'] / HoHH_gender.loc['Total', 'Total'], 2)
HoHH_gender.loc['Total', 'Female'] = HoHH_gender['Female'].sum()
HoHH_gender.loc['Total', 'Female %'] = round(100*HoHH_gender.loc['Total', 'Female'] / HoHH_gender.loc['Total', 'Total'], 2)
HoHH_gender.fillna({'Male': 0, 'Female': 0}, inplace=True)
HoHH_gender['Male'] = HoHH_gender['Male'].astype(int)
HoHH_gender['Female'] = HoHH_gender['Female'].astype(int)
HoHH_gender['Total'] = HoHH_gender['Total'].astype(int)

display(HoHH_gender)

Unnamed: 0_level_0,Male,Male %,Female,Female %,Total
village,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Bahajeil,8,36.36,14,63.64,22
Eirhagayeet,8,33.33,16,66.67,24
Gabrayeet,2,18.18,9,81.82,11
Gadamayeeb Adkihinab,7,31.82,15,68.18,22
Gadamayeeb Algadima,9,25.71,26,74.29,35
Hamtiyay Abrhemab,0,,1,100.0,1
Khashum Tahday,10,37.04,17,62.96,27
Maman Kameragadap,10,27.78,26,72.22,36
Maman belimiklab,2,15.38,11,84.62,13
Pelait,6,21.43,22,78.57,28


In [89]:
print reg_survey_femHoHH['deviceid'].count()
print reg_survey_maleHoHH['deviceid'].count()

273
111


In [90]:
create_graphs_HoHH(lambda x: x.groupby('education').size(),
                 lambda x: x.values,
                 lambda x: x.index.values,
                   lambda x: x.sum(),
                 "Education level of the head of household")


In [91]:
#hey = reg_survey.groupby(['village', 'house_material']).size()

#house_materials_df = pd.DataFrame(index=house_materials_index, columns=villages, data=[house_materials_general])
#print house_materials_df

create_graphs_HoHH(lambda x: pd.crosstab(x.village, x.house_material),
                 lambda x: x.sum().values,
                 lambda x: x.columns.values,
                   lambda x: x.sum().sum(),
                 "House materials")

create_pie_graph(reg_survey.groupby('house_material').size(), "House materials")

fig_house_materials2 = {
    "layout": {
        "title": {
            "font": {
                    "size": 20
            },
            "text": "House materials"
        }
    },
    "data": []
}

house_materials_df = pd.crosstab(reg_survey.village, reg_survey.house_material)
#print range(len(villages_dict['en'].keys())-1)
#print house_materials_df
#for i in range(len(villages_dict['en'].keys())):
#    x = .2*(i%5)
#    y = 0.35*(i/5)
#    fig_house_materials2['data'].append(
#        {
#
#            "values": house_materials_df.iloc[i-1].values,
#            "labels": house_materials_df.columns.values,
#             "domain": {
#                "x": [x, x+.18],
#                 "y": [y, y+.3]
#            },
#            "type": "pie",
#            "sort": False
#        })
#
#iplot(fig_house_materials2)

###  Mean Household size

In [92]:
mean_hh_size = reg_survey.family_total.mean()
print str(mean_hh_size) + " persons per household"

5.7890625 persons per household


## Climate change awareness

In [93]:
create_graphs_respondent(lambda x: x.groupby('cc_knowledge').size(),
                         lambda x: x.values,
                         lambda x: x.index.values,
                         lambda x: x.sum(),
                         "Households declaring CC knowledge")

In [94]:
create_graphs_respondent(lambda x: x.groupby('taking_action_cc').size(),
                         lambda x: x.values,
                         lambda x: x.index.values,
                         lambda x: x.sum(),
                         "Households declaring taking actions against CC")

#### EWS awareness

In [95]:
create_graphs_respondent(lambda x: x.groupby('ews_knowledge').size(),
                         lambda x: x.values,
                         lambda x: x.index.values,
                         lambda x: x.sum(),
                         "Households declaring EWS knowledge")

In [96]:
create_graphs_respondent(lambda x: x.groupby('use_ews').size(),
                         lambda x: x.values,
                         lambda x: x.index.values,
                         lambda x: x.sum(),
                         "Households using EWS information")

#### Renewable energy awareness

In [97]:
create_graphs_respondent(lambda x: x.groupby('renewable_knowledge').size(),
                         lambda x: x.values,
                         lambda x: x.index.values,
                         lambda x: x.sum(),
                         "Households declaring renewable energies knowledge")

In [98]:
create_graphs_respondent(lambda x: x.groupby('solar_energy').size(),
                         lambda x: x.values,
                         lambda x: x.index.values,
                         lambda x: x.sum(),
                         "Households using solar energies")

#### Cooking facilities

In [99]:
kitchen_type = reg_survey.groupby('kitchen_type').size()
kitchen_type_male = get_ocurrences(reg_survey_maleHoHH, 'kitchen_type').sort_values(ascending = False)
kitchen_type_female = get_ocurrences(reg_survey_femHoHH, 'kitchen_type').sort_values(ascending = False)
kitchen = pd.crosstab(reg_survey.village, reg_survey.kitchen_type)
#display(kitchen)

kitchen_perc_male = 100*kitchen_type_male/total_surveys
kitchen_perc_female = 100*kitchen_type_female/total_surveys
kitchen_perc = 100*kitchen_type/total_surveys
kitchen_perc = kitchen_perc.round(2)
#print kitchen_perc

create_stacked_bars_graph((reg_survey_femHoHH, reg_survey_maleHoHH),
                              lambda x: 100 * get_ocurrences(x, 'kitchen_type').sort_values(ascending = False) / total_surveys,
                              lambda x: map(lambda y: y.capitalize(), x.index.values),
                              lambda x: x.values,
                              "Cooking techniques",
                              "Female Head of Household",
                              "Male Head of Household")

## Livelihoods

Number of households practicing each livelihood

In [100]:
#reg_survey.loc[reg_survey['src_incomes'].str.contains("agriculture"), 'agriculture'] = 'yes'
#reg_survey.loc[reg_survey['src_incomes'].str.contains("livestock"), 'livestock'] = 'yes'
#reg_survey.loc[reg_survey['src_incomes'].str.contains("labour"), 'labour'] = 'yes'
#importance = reg_survey.groupby(['agriculture', 'livestock', 'labour'])['perc_incomes_agri'].agg('mean')

#agric = reg_survey[reg_survey['src_incomes'].str.contains("agriculture")]['deviceid'].count()

src_incomes_sums = reg_survey.groupby('src_incomes').size()
lh_values = get_ocurrences(reg_survey, 'src_incomes')
lh_types = pd.DataFrame({"Households": lh_values})
#lh_types["Percentage"] = lh_types.Households.map(lambda x: str(round(100*x/total_surveys))) + " %"

print ""
print lh_values.sort_values(ascending = False)


livestock      234
forestry       130
other           43
handicrafts     30
labour          19
agriculture     19
dtype: int64


In [101]:
lh_perc = 100*lh_values/total_surveys
lh_perc = lh_perc.round(2)

create_stacked_bars_graph((reg_survey_femHoHH, reg_survey_maleHoHH),
                              lambda x: 100 * get_ocurrences(x, 'src_incomes') / total_surveys,
                              lambda x: map(lambda y: y.capitalize(), x.index.values),
                              lambda x: x.values,
                              "Livelihoods Strategies",
                              "Female Head of Household",
                              "Male Head of Household")

create_stacked_bars_graph((reg_survey_female, reg_survey_male),
                              lambda x: 100 * get_ocurrences(x, 'src_incomes') / total_surveys,
                              lambda x: map(lambda y: y.capitalize(), x.index.values),
                              lambda x: x.values,
                              "Livelihoods Strategies",
                              "Female respondent",
                              "Male respondent")

livelihoods_strategies = {
    "layout": {
        "title": "Livelihoods Strategies",
        "xaxis": {
            "tickangle": -45
        },
        "width": 700,
        "annotations": []
    },
    "data": [
        {
            "x": map(lambda x: x.capitalize(), lh_perc.index.values),
            "y": lh_perc.values,
            "type": "bar",
            "name": "Livelihoods Strategies",
            "marker": {
                "color": 'rgb(258,225,225)',
                "line": {
                    "color": 'rgb(192,0,0)',
                    "width": 1.5
                }
            },
        }
    ]
}


for i in range(0, len(lh_values)):
    livelihoods_strategies["layout"]["annotations"].append(
        {
            "x": map(lambda x: x.capitalize(), lh_perc.index.values)[i],
            "y": lh_perc.values[i] + 1.5,
            "text": str(lh_perc.values[i]) + "%",
            "font": dict(
                family='Arial',
                size=14,
                color='rgba(100, 0, 0, 1)'
            ),
            "showarrow": False,

        }
    )

iplot(livelihoods_strategies)

In [102]:
lh_values_hand = get_ocurrences(reg_survey_hand_yes, 'src_incomes')
lh_perc_hand = 100*lh_values_hand/reg_survey_hand_yes['deviceid'].count()
lh_perc_hand = lh_perc_hand.round(2)

livelihoods_strategies_hand = {
    "layout": {
        "title": "Livelihoods Strategies in HH with dependent members",
        "xaxis": {
            "tickangle": -45
        },
        "width": 700,
        "annotations": []
    },
    "data": [
        {
            "x": map(lambda x: x.capitalize(), lh_perc_hand.index.values),
            "y": lh_perc_hand.values,
            "type": "bar",
            "name": "Livelihoods Strategies",
            "marker": {
                "color": 'rgb(258,225,225)',
                "line": {
                    "color": 'rgb(192,0,0)',
                    "width": 1.5
                }
            },
        }
    ]
}


for i in range(0, len(lh_values_hand)):
    livelihoods_strategies_hand["layout"]["annotations"].append(
        {
            "x": map(lambda x: x.capitalize(), lh_perc_hand.index.values)[i],
            "y": lh_perc_hand.values[i] + 1.2,
            "text": str(lh_perc_hand.values[i]) + "%",
            "font": dict(
                family='Arial',
                size=14,
                color='rgba(100, 0, 0, 1)'
            ),
            "showarrow": False,

        }
    )

iplot(livelihoods_strategies_hand)

Main livelihoods per household

In [103]:
print src_incomes_sums.sort_values(ascending = False)
reg_survey['num_lh_strats'] = reg_survey['src_incomes'].map(lambda x: len(x.split()))
create_graphs_HoHH(lambda x: x.groupby('num_lh_strats').size(),
                         lambda x: x.values,
                         lambda x: x.index.values,
                         lambda x: x.sum(),
                         "Number of livelihoods Strategies")
create_graphs_respondent(lambda x: x.groupby('num_lh_strats').size(),
                         lambda x: x.values,
                         lambda x: x.index.values,
                         lambda x: x.sum(),
                         "Number of livelihoods Strategies")

src_incomes
livestock                         162
forestry                           90
livestock forestry                 25
other                              24
livestock handicrafts              18
labour                             16
livestock other                    11
agriculture                         9
agriculture livestock               6
livestock forestry handicrafts      5
forestry handicrafts                3
livestock forestry other            2
handicrafts                         2
forestry other                      2
livestock labour                    2
labour forestry                     1
livestock handicrafts other         1
agriculture other                   1
agriculture livestock other         1
agriculture livestock forestry      1
agriculture forestry                1
handicrafts other                   1
dtype: int64


In [104]:
reg_survey_hand_yes['num_lh_strats'] = reg_survey_hand_yes['src_incomes'].map(lambda x: len(x.split()))
create_pie_graph(reg_survey_hand_yes.groupby('num_lh_strats').size(), "Number livelihoods strategies in HH with dependent members")

- 277 out of 399 hh, i.e. 69.42%, lives only on livestock, forestry or both.

### Livestock

Number of interviewed households owning each type of livestock

In [105]:
livestock_set = reg_survey.loc[reg_survey['src_incomes'].str.contains("livestock")]
livestock_set_female = reg_survey_female.loc[reg_survey_female['src_incomes'].str.contains("livestock")]
livestock_set_femHoHH = reg_survey_femHoHH.loc[reg_survey_femHoHH['src_incomes'].str.contains("livestock")]

participants = livestock_set.groupby('livestock_participants').size()
participants_goats = livestock_set.loc[livestock_set['livestock_type'].str.contains("goat")].groupby('livestock_participants').size()
#print participants_goats

lvstk_types = ['camel', 'cattle', 'goat', 'sheep', 'chicken']

livestock_women = livestock_set.loc[livestock_set['livestock_participants'] == "women"]
type_women =  livestock_women.groupby('livestock_type').size()
#livestock_set = livestock_set.copy()
for i in lvstk_types:
    livestock_set[i + '_reduced'] = livestock_set['livestock_lost_' + i] / (livestock_set['livestock_number_' + i] + livestock_set['livestock_number_' + i])
#print ("Types for women")
#print(type_women)
#print livestock_set.groupby('camel_reduced').size()
goats = livestock_set['livestock_number_goat'].dropna()
#goats_female = livestock_set_female['livestock_number_goat'].dropna()
goats_femHoHH = livestock_set_femHoHH['livestock_number_goat'].dropna()

sheeps = livestock_set['livestock_number_sheep'].dropna()
#sheeps_female = livestock_set_female['livestock_number_sheep'].dropna()
sheeps_femHoHH = livestock_set_femHoHH['livestock_number_sheep'].dropna()

cattle = livestock_set['livestock_number_cattle'].dropna()
#cattle_female = livestock_set_female['livestock_number_cattle'].dropna()
cattle_femHoHH = livestock_set_femHoHH['livestock_number_cattle'].dropna()

chickens = livestock_set['livestock_number_chicken'].dropna()
#chickens_female = livestock_set_female['livestock_number_chicken'].dropna()
chickens_femHoHH = livestock_set_femHoHH['livestock_number_chicken'].dropna()

camels = livestock_set['livestock_number_camel'].dropna()

livestock_set.groupby('livestock_training').size()


livestock_training
no     103
yes    131
dtype: int64

In [106]:
livestock_set['livestock_number_gosh'] = livestock_set['livestock_number_goat'] + livestock_set['livestock_number_sheep']

print livestock_set['livestock_number_gosh'].sum()
print livestock_set['livestock_number_goat'].mean()
print livestock_set['livestock_number_sheep'].mean()

types_lvstck = get_ocurrences(livestock_set, 'livestock_type')

def score_gs(gs):
    if (gs<= 5):
        result = 0
    elif (gs <= 20):
        result = 1
    elif (gs <= 50):
        result = 2
    elif (gs <= 150):
        result = 3
    else:
        result = 5
    return result

def score_camel(gs):
    if (gs<= 0):
        result = 0
    elif (gs <= 3):
        result = 1
    elif (gs <= 6):
        result = 2
    elif (gs <= 15):
        result = 3
    else:
        result = 5
    return result

def score_cattle(gs):
    if (gs<= 0):
        result = 0
    elif (gs <= 2):
        result = 1
    elif (gs <= 5):
        result = 2
    elif (gs <= 15):
        result = 3
    else:
        result = 5
    return result

def score_wealth_lvstck(df):
    sum_gs = df['livestock_number_gosh'].map(score_gs)
    sum_camel = df['livestock_number_camel'].map(score_camel)
    sum_cattle = df['livestock_number_cattle'].map(score_cattle)
    df['wealth_group_score'] = sum_gs + sum_camel + sum_cattle

score_wealth_lvstck(livestock_set)

4227.0
12.6709401709
5.39316239316


In [107]:
perc_lvstck = 100*types_lvstck/livestock_set['deviceid'].count()
perc_lvstck = perc_lvstck.round(2)
fig_types_lvstck = {
    "layout": {
        "title": "Livestock types",
        "xaxis": {
            "tickangle": -45
        },
        "width": 700,
        "annotations": []
    },
    "data": [
        {
            "x": map(lambda x: x.capitalize(), perc_lvstck.index.values),
            "y": perc_lvstck.values,
            "type": "bar",
            "name": "Livelihoods Strategies",
            "marker": {
                "color": 'rgb(258,225,225)',
                "line": {
                    "color": 'rgb(192,0,0)',
                    "width": 1.5
                }
            }
        }
    ]
}


for i in range(0, len(types_lvstck)):
    fig_types_lvstck["layout"]["annotations"].append(
        {
            "x": map(lambda x: x.capitalize(), perc_lvstck.index.values)[i],
            "y": perc_lvstck.values[i] + 1.5,
            "text": str(perc_lvstck.values[i]) + "%",
            "font": dict(
                family='Arial',
                size=14,
                color='rgba(100, 0, 0, 1)'
            ),
            "showarrow": False,

        }
    )

iplot(fig_types_lvstck)


In [108]:
prueba = pd.crosstab(livestock_set.livestock_number_goat, livestock_set.livestock_number_sheep).stack().reset_index(name='number')
#prueba.plot.scatter('livestock_number_goats', 'livestock_number_sheep', s=prueba.number * 10)
prueba = prueba[prueba.number != 0]
prueba = prueba[prueba.livestock_number_goat <= 100]
prueba = prueba[prueba.livestock_number_sheep <= 100]
prueba = prueba.drop(0)
#display(prueba)
goats_vs_sheeps = {
    "layout": {
        "title": "Relation between the number of goats and sheeps",
        "xaxis": {
            "title": "Number of goats"
        },
        "yaxis": {
            "title": "Number of sheeps"
        }
    },
    "data": [
        {

            "x": prueba.livestock_number_goat,
            "y": prueba.livestock_number_sheep,
            "mode": "markers",
            "marker": {
                "size": prueba.number * 5               
            },
            "type": "scatter"
        }
    ]
}
iplot(goats_vs_sheeps)

In [109]:
prueba2 = pd.crosstab(livestock_set.livestock_number_cattle, livestock_set.livestock_number_sheep).stack().reset_index(name='number')
#prueba.plot.scatter('livestock_number_goats', 'livestock_number_sheep', s=prueba.number * 10)
prueba2 = prueba2[prueba2.number != 0]
prueba2= prueba2.drop(0)
prueba2 = prueba2[prueba2.livestock_number_cattle <= 100]
prueba2 = prueba2[prueba2.livestock_number_sheep <= 100]
#display(prueba)
cattle_vs_sheeps = {
    "layout": {
        "title": "Relation between the number of cattle and sheeps",
        "xaxis": {
            "title": "Number of cattle"
        },
        "yaxis": {
            "title": "Number of sheeps"
        }
    },
    "data": [
        {

            "x": prueba2.livestock_number_cattle,
            "y": prueba2.livestock_number_sheep,
            "mode": "markers",
            "marker": {
                "size": prueba2.number * 5               
            },
            "type": "scatter"
        }
    ]
}
iplot(cattle_vs_sheeps)

In [110]:
prueba3 = pd.crosstab(livestock_set.livestock_number_goat, livestock_set.livestock_number_cattle).stack().reset_index(name='number')
#prueba.plot.scatter('livestock_number_goats', 'livestock_number_sheep', s=prueba.number * 10)
prueba3 = prueba3[prueba3.number != 0]
prueba3= prueba3.drop(0)
prueba3 = prueba3[prueba3.livestock_number_goat <= 100]
prueba3 = prueba3[prueba3.livestock_number_cattle <= 100]
#display(prueba)
goat_vs_cattle = {
    "layout": {
        "title": "Relation between the number of cattle and goats",
        "xaxis": {
            "title": "Number of goat"
        },
        "yaxis": {
            "title": "Number of cattle"
        }
    },
    "data": [
        {

            "x": prueba3.livestock_number_goat,
            "y": prueba3.livestock_number_cattle,
            "mode": "markers",
            "marker": {
                "size": prueba3.number * 5               
            },
            "type": "scatter"
        }
    ]
}
iplot(goat_vs_cattle)

In [111]:
prueba4 = pd.crosstab(livestock_set.livestock_number_goat, livestock_set.livestock_number_camel).stack().reset_index(name='number')
#prueba.plot.scatter('livestock_number_goats', 'livestock_number_sheep', s=prueba.number * 10)
prueba4 = prueba4[prueba4.number != 0]
prueba4= prueba4.drop(0)
prueba4 = prueba4[prueba4.livestock_number_goat <= 100]
prueba4 = prueba4[prueba4.livestock_number_camel <= 100]
#display(prueba)
goat_vs_camel = {
    "layout": {
        "title": "Relation between the number of camel and sheeps",
        "xaxis": {
            "title": "Number of goat"
        },
        "yaxis": {
            "title": "Number of camel"
        }
    },
    "data": [
        {

            "x": prueba4.livestock_number_goat,
            "y": prueba4.livestock_number_camel,
            "mode": "markers",
            "marker": {
                "size": prueba4.number * 5               
            },
            "type": "scatter"
        }
    ]
}
iplot(goat_vs_camel)

In [112]:
def getColor5(value):
    if (value <=0):
        return "red"
    elif (value <= 3):
        return "orange"
    else:
        return "green"

def getWealth5(df):
    df['wealth'] = df['livestock_number_gosh'].map(score_gs) + df['livestock_number_camel'].map(score_camel)
    return df['wealth']
    
prueba5 = pd.crosstab(livestock_set.livestock_number_gosh, livestock_set.livestock_number_camel).stack().reset_index(name='number')
#prueba.plot.scatter('livestock_number_goats', 'livestock_number_sheep', s=prueba.number * 10)
prueba5 = prueba5[prueba5.number != 0]
prueba5= prueba5.drop(0)
#prueba5 = prueba5[prueba5.livestock_number_gosh <= 100]
#prueba5 = prueba5[prueba5.livestock_number_camel <= 100]
#display(prueba)
gosh_vs_camel = {
    "layout": {
        "title": "Relation between the number of camel and sheeps",
        "xaxis": {
            "title": "Number of goat"
        },
        "yaxis": {
            "title": "Number of camel"
        }
    },
    "data": [
        {

            "x": prueba5.livestock_number_gosh,
            "y": prueba5.livestock_number_camel,
            "mode": "markers",
            "marker": {
                "size": prueba5.number * 5, 
                "color": getWealth5(prueba5).map(getColor5)
            },
            "type": "scatter"
        }
    ]
}
iplot(gosh_vs_camel)

In [113]:
def getColor6(value):
    if (value <=0):
        return "red"
    elif (value <= 3):
        return "orange"
    else:
        return "green"

def getWealth6(df):
    df['wealth'] = df['livestock_number_gosh'].map(score_gs) + df['livestock_number_cattle'].map(score_cattle)
    return df['wealth']
    
prueba6 = pd.crosstab(livestock_set.livestock_number_gosh, livestock_set.livestock_number_cattle).stack().reset_index(name='number')
#prueba.plot.scatter('livestock_number_goats', 'livestock_number_sheep', s=prueba.number * 10)
prueba6 = prueba6[prueba6.number != 0]
prueba6= prueba6.drop(0)
#prueba5 = prueba5[prueba5.livestock_number_gosh <= 100]
#prueba5 = prueba5[prueba5.livestock_number_camel <= 100]
#display(prueba)
gosh_vs_cattle = {
    "layout": {
        "title": "Relation between the number of camel and sheeps",
        "xaxis": {
            "title": "Number of goat"
        },
        "yaxis": {
            "title": "Number of camel"
        }
    },
    "data": [
        {

            "x": prueba6.livestock_number_gosh,
            "y": prueba6.livestock_number_cattle,
            "mode": "markers",
            "marker": {
                "size": prueba6.number * 5,
                "color": getWealth6(prueba6).map(getColor6)
            },
            "type": "scatter"
        }
    ]
}
iplot(gosh_vs_cattle)

With a few exceptions, the number of goats per household is proportional to the number of sheeps. So to define wealth groups for livestock tenants the number of goats will be the main variable.

In [114]:
goats = goats.value_counts().sort_index()
goats_femHoHH = goats_femHoHH.value_counts().sort_index()
sheeps = sheeps.value_counts().sort_index()
sheeps_femHoHH = sheeps_femHoHH.value_counts().sort_index()
cattle = cattle.value_counts().sort_index()
cattle_femHoHH = cattle_femHoHH.value_counts().sort_index()

In [115]:
values = [5,10, 40]
print values
values_femHoHH = [5,10, 40]
print values_femHoHH
per2 = get_percentiles(goats, values)
percentiles = pd.Series(values, index = per2)

print percentiles

print pd.Series(values_femHoHH, index = get_percentiles(goats_femHoHH, values_femHoHH))

plot_wealth_criteria(goats, values, "Number of goats per wealth group")
#plot_wealth_criteria(land_quantity_female, values_female, "Number of feddans per wealth group in female respondents")
plot_wealth_criteria(goats_femHoHH, values_femHoHH, "Number of goats per wealth group in female HoHH")

[5, 10, 40]
[5, 10, 40]
0.226496     5
0.551282    10
0.978632    40
dtype: int64
0.203704     5
0.537037    10
0.975309    40
dtype: int64
6.0      7.264957
7.0      3.846154
8.0      4.273504
9.0      4.273504
10.0    12.820513
Name: livestock_number_goat, dtype: float64


6.0      8.641975
7.0      3.086420
8.0      3.086420
9.0      5.555556
10.0    12.962963
Name: livestock_number_goat, dtype: float64


In [116]:
values = [0,5, 20]
print values
values_femHoHH = [0,5, 20]
print values_femHoHH
per2 = get_percentiles(sheeps, values)
percentiles = pd.Series(values, index = per2)

print percentiles

print pd.Series(values_femHoHH, index = get_percentiles(sheeps_femHoHH, values_femHoHH))

plot_wealth_criteria(sheeps, values, "Number of sheeps per wealth group")
#plot_wealth_criteria(land_quantity_female, values_female, "Number of feddans per wealth group in female respondents")
plot_wealth_criteria(sheeps_femHoHH, values_femHoHH, "Number of sheeps per wealth group in female HoHH")

[0, 5, 20]
[0, 5, 20]
0.594017     0
0.683761     5
0.957265    20
dtype: int64
0.574074     0
0.666667     5
0.975309    20
dtype: int64
3.0    2.564103
4.0    2.564103
5.0    3.846154
Name: livestock_number_sheep, dtype: float64


3.0    2.469136
4.0    3.703704
5.0    3.086420
Name: livestock_number_sheep, dtype: float64


#### Quantiles for goats
- Poors: 35% of households have less than 10 goats and less than 10 sheeps
- Medium: 62% of households have between 10 and 35 goats and between 20 and 30 sheeps
- Richs: 3% of households have more than 35 goats and/or more than 30 sheeps

In [117]:
livestock_uses = ["selling", "social", "milk", "meat", "savings"]
goats_uses = ["selling", "social", "milk", "meat", "savings"]
sheeps_uses = ["selling", "social", "milk", "meat", "savings"]
livestock_uses_df = livestock_set[map(lambda x: "livestock_" + x, livestock_uses)]
livestock_uses_df.fillna('no')
livestock_uses = livestock_uses_df.apply(pd.Series.value_counts).fillna('0').loc['yes']
num_lvstck = livestock_set['deviceid'].count()
livestock_uses_perc = livestock_uses * 100 / num_lvstck

fig_uses_lvstck = {
    "layout": {
        "title": "Livestock uses",
        "xaxis": {
            "tickangle": -45
        },
        "yaxis": {
            "ticksuffix": "%"
        },
        "width": 700,
        "annotations": []
    },
    "data": [
        {
            "x": map(lambda x: x.capitalize(), livestock_uses_perc.index.values),
            "y": livestock_uses_perc.values,
            "type": "bar",
            "name": "Live uses",
            "marker": {
                "color": 'rgb(258,225,225)',
                "line": {
                    "color": 'rgb(192,0,0)',
                    "width": 1.5
                }
            }
        }
    ]
}


iplot(fig_uses_lvstck)

In [118]:
print "USES TOTAL LIVESTOCK"
print livestock_uses
print livestock_uses_perc
goats_uses = livestock_set.loc[livestock_set['livestock_type'].str.contains("goat")][map(lambda x: "livestock_" + x, goats_uses)]
print ""
print "USES TOTAL GOATS"
print goats_uses.apply(pd.Series.value_counts)
sheeps_uses = livestock_set.loc[livestock_set['livestock_type'].str.contains("sheep")][map(lambda x: "livestock_" + x, sheeps_uses)]
print ""
print "USES TOTAL SHEEPS"
print sheeps_uses.apply(pd.Series.value_counts)

USES TOTAL LIVESTOCK
livestock_selling    155
livestock_social     156
livestock_milk       227
livestock_meat       170
livestock_savings    195
Name: yes, dtype: object
livestock_selling    66.2393
livestock_social     66.6667
livestock_milk       97.0085
livestock_meat       72.6496
livestock_savings    83.3333
Name: yes, dtype: object

USES TOTAL GOATS
           livestock_selling  livestock_social  livestock_milk  \
no                      74.0                72               3   
no_answer                NaN                 2               3   
yes                    144.0               144             212   

           livestock_meat  livestock_savings  
no                     54                 30  
no_answer               3                  7  
yes                   161                181  

USES TOTAL SHEEPS
           livestock_selling  livestock_social  livestock_milk  \
no                      17.0              30.0               1   
no_answer                NaN         

In [119]:
uses_lvstck = get_ocurrences(livestock_set, 'livestock_type')

perc_lvstck = 100*types_lvstck/livestock_set['deviceid'].count()
perc_lvstck = perc_lvstck.round(2)
fig_types_lvstck = {
    "layout": {
        "title": "Livestock types",
        "xaxis": {
            "tickangle": -45
        },
        "width": 700,
        "annotations": []
    },
    "data": [
        {
            "x": map(lambda x: x.capitalize(), perc_lvstck.index.values),
            "y": perc_lvstck.values,
            "type": "bar",
            "name": "Livelihoods Strategies",
            "marker": {
                "color": 'rgb(258,225,225)',
                "line": {
                    "color": 'rgb(192,0,0)',
                    "width": 1.5
                }
            }
        }
    ]
}


for i in range(0, len(types_lvstck)):
    fig_types_lvstck["layout"]["annotations"].append(
        {
            "x": map(lambda x: x.capitalize(), perc_lvstck.index.values)[i],
            "y": perc_lvstck.values[i] + 1.5,
            "text": str(perc_lvstck.values[i]) + "%",
            "font": dict(
                family='Arial',
                size=14,
                color='rgba(100, 0, 0, 1)'
            ),
            "showarrow": False,

        }
    )

iplot(fig_types_lvstck)

### Laborers

In [120]:
labour_set = reg_survey.loc[reg_survey['src_incomes'].str.contains("labour")]
labour_set_femHoHH = reg_survey_femHoHH.loc[reg_survey_femHoHH['src_incomes'].str.contains("labour")]
labor_sector = get_ocurrences(labour_set, 'labour_sector')
labor_sector_femHoHH = get_ocurrences(labour_set_femHoHH, 'labour_sector')
#create_pie_graph(labor_sector, "Labor sector")
create_graphs_HoHH(lambda x: get_ocurrences(x.loc[x['src_incomes'].str.contains("labour")], 'labour_sector'),
                         lambda x: x.values,
                         lambda x: x.index.values,
                         lambda x: x.sum(),
                         "Labor sector")
labour_set.loc[labour_set['labour_participants'].str.contains('women')][['labour_participants', 'respondant_gender', 'respondant_age', 'respondant_head_hh', 'male_teenager', 'male_adult', 'male_elderly', 'labour_sector']]
labour_incomes = labour_set.groupby('labour_daily_salary').size()
mean_salary = labour_set['labour_daily_salary'].mean()
sectors_100 = labour_set.loc[labour_set['labour_daily_salary'] == 100].groupby('labour_sector').size()
print str(labour_set['deviceid'].count()) + " laborers"
create_graphs_HoHH(lambda x: x.loc[x['src_incomes'].str.contains("labour")].groupby('labour_work_term').size(),
                         lambda x: x.values,
                         lambda x: x.index.values,
                         lambda x: x.sum(),
                         "Temporality of laborers")
create_graphs_HoHH(lambda x: x.loc[x['src_incomes'].str.contains("labour")].groupby('labour_employment_type').size(),
                         lambda x: x.values,
                         lambda x: x.index.values,
                         lambda x: x.sum(),
                         "Employment Type")
create_graphs_HoHH(lambda x: x.loc[x['src_incomes'].str.contains("labour")].groupby('labour_participants').size(),
                         lambda x: x.values,
                         lambda x: x.index.values,
                         lambda x: x.sum(),
                         "Gender of laborers")

19 laborers


In [121]:
gov_salaries = labour_set.loc[labour_set['labour_sector'] == 'government']
agri_salaries = labour_set.loc[labour_set['labour_sector'] == 'agriculture']
trans_salaries = labour_set.loc[labour_set['labour_sector'] == 'transportation']
other_salaries = labour_set.loc[labour_set['labour_sector'] == 'other']
print mean_salary
gov_salaries_mean = gov_salaries['labour_daily_salary'].mean()
gov_salaries_num = gov_salaries.groupby('labour_daily_salary').size()
agri_salaries_mean = agri_salaries['labour_daily_salary'].mean()
agri_salaries_num = agri_salaries.groupby('labour_daily_salary').size()
trans_salaries_mean = trans_salaries['labour_daily_salary'].mean()
trans_salaries_num = trans_salaries.groupby('labour_daily_salary').size()
other_salaries_mean = other_salaries['labour_daily_salary'].mean()
other_salaries_num = other_salaries.groupby('labour_daily_salary').size()

salaries = {
    "layout": {
        "title": "Relation between the number of cattle and sheeps",
        "xaxis": {
            "title": "Number of goat"
        },
        "yaxis": {
            "title": "Number of cattle"
        }
    },
    "data": [
        {

            "x": prueba3.livestock_number_goat,
            "y": prueba3.livestock_number_cattle,
            "mode": "markers",
            "marker": {
                "size": prueba.number * 5               
            },
            "type": "scatter"
        }
    ]
}
iplot(goat_vs_cattle)

30.8333333333


### Agriculture

### Forestry (charcoal production)

In [122]:
forestry_set = reg_survey.loc[reg_survey['src_incomes'].str.contains("forestry")]
print forestry_set.groupby('forestry_forestry_type').size()
print get_ocurrences(forestry_set, 'forestry_forestry_type')
forestry_participants =  forestry_set.groupby('forestry_participants').size()
#forestry_set.loc[forestry_set['forestry_participants'].str.contains('women')][['forestry_participants', 'respondant_gender', 'respondant_age', 'respondant_head_hh', 'male_teenager', 'male_adult', 'male_elderly', 'labour_sector']]
forestry_land_property = forestry_set.groupby('forestry_land_property').size()

create_pie_graph(forestry_set.groupby('forestry_forestry_type').size(), "Forestry Type")
create_pie_graph(forestry_set.groupby('forestry_participants').size(), "Forestry participants")
create_pie_graph(forestry_set.groupby('forestry_land_property').size(), "Forests property")

print forestry_participants
print forestry_land_property

forestry_forestry_type
charcoal              3
charcoal fuelwood    95
fuelwood             28
fuelwood other        4
dtype: int64
charcoal     98
fuelwood    127
other         4
dtype: int64


forestry_participants
men             110
men children      9
men women        10
dtype: int64
forestry_land_property
community             60
none                  58
none own community     1
own                    4
own community          2
shared                 5
dtype: int64


In [123]:
def create_2pie_graph(serie1, serie2, title, title1, title2):
    fig = {
        "layout": {
            "title": title,
            "annotations": [
                {
                    "font": {
                        "size": 14
                    },
                    "showarrow": False,
                    "text": title1,
                    "x": .20,
                    "y": 1.1
                },
                {
                    "font": {
                        "size": 14
                    },
                    "showarrow": False,
                    #"text": title2 + " (" + str(sum_func(data_global)) + ")",
                    "text": title2,
                    "x": .8,
                    "y": 1.1 
                }
            ]
        },
        "data": [
            {
                "values": serie1.values,
                "labels": serie1.index.values,
                "type": "pie",
                "sort": False,
                "domain": {
                    "x": [.07, .43]
                },
            },
            {
                "values": serie2.values,
                "labels": serie2.index.values,
                "type": "pie",
                "sort": False,
                "domain": {
                    "x": [.57, .93]
                },
            }
        ]
    }

    iplot(fig)
    
create_2pie_graph(forestry_set.groupby('forestry_training').size(), livestock_set.groupby('livestock_training').size(), "Population who has received training in their livelihoods strategies", "Forestry", "Livestock")

### Other

In [124]:
other_set = reg_survey.loc[reg_survey['src_incomes'].str.contains("other")]
other_set.groupby('other_training').size()


other_training
no     37
yes     6
dtype: int64

### Coping strategies

In [125]:
cop_strats = get_ocurrences(reg_survey, 'coping_strat')
cop_strats = cop_strats * 100 / total_surveys

fig_cop_strats = {
    "layout": {
        "title": "Coping Strategies",
        "xaxis": {
            "tickangle": -45
        },
        "yaxis": {
            "ticksuffix": "%"
        },
        "width": 700,
        "annotations": []
    },
    "data": [
        {
            "x": map(lambda x: x.capitalize(), cop_strats.index.values),
            "y": cop_strats.values,
            "type": "bar",
            "name": "Livelihoods Strategies",
            "marker": {
                "color": 'rgb(205,205,258)',
                "line": {
                    "color": 'rgb(0,0,192)',
                    "width": 1.5
                }
            },
        }
    ]
}

iplot(fig_cop_strats)

## Climate change effects

In [126]:
cc_effects_on_rain = get_ocurrences(reg_survey, 'changed_rainy_season')

print cc_effects_on_rain

decreased         327
season_changed    176
unsteady          106
increased          53
no_changes         14
unpredictable       3
dtype: int64


In [127]:
sand_mov_sums = reg_survey.groupby('sand_movements').size()
wind_changes = get_ocurrences(reg_survey, 'sand_movements')

print wind_changes
print sand_mov_sums

create_graphs_respondent(lambda x: x.groupby('sand_movements').size(),
                         lambda x: x.values,
                         lambda x: x.index.values,
                         lambda x: x.sum(),
                         "Respondents asked about wind changes in las ten years")

create_pie_graph(wind_changes, "Respondents asked about wind changes in last ten years")

less_windy        59
more_windy       257
no_changes        45
unpredictable     27
dtype: int64
sand_movements
less_windy                   55
more_windy                  254
more_windy less_windy         2
no_changes                   45
unpredictable                24
unpredictable less_windy      2
unpredictable more_windy      1
dtype: int64


In [128]:
temps_changes = get_ocurrences(reg_survey, 'temps_change')

print temps_changes

decreased         53
unsteady          71
unpredictable      6
increased        309
no_changes         1
dtype: int64


In [129]:
effects_on_lvstck = get_ocurrences(reg_survey, 'cc_effects_grassland_availability').sort_values(ascending = False)
lvstck_perc = 100*effects_on_lvstck/livestock_set['deviceid'].size
lvstck_perc = lvstck_perc.round(2)
fig_effects_on_lvstck = {
    "layout": {
        "title": "Trend of the availability of grassland in the last 5 years",
        "xaxis": {
            "tickangle": -45
        },
        "width": 700,
        "annotations": []
    },
    "data": [
        {
            "x": map(lambda x: x.capitalize(), lvstck_perc.index.values),
            "y": lvstck_perc.values,
            "type": "bar",
            "marker": {
                "color": 'rgb(25,158,25,0.6)',
                "line": {
                    "color": 'rgb(0,70,0)',
                    "width": 1.5
                }
            },
        }
    ]
}


for i in range(0, len(effects_on_lvstck)):
    fig_effects_on_lvstck["layout"]["annotations"].append(
        {
            "x": map(lambda x: x.capitalize(), lvstck_perc.index.values)[i],
            "y": lvstck_perc.values[i] + 1.7,
            "text": str(lvstck_perc.values[i]) + "%",
            "font": dict(
                family='Arial',
                size=14,
                color='rgba(0, 100, 0, 1)'
            ),
            "showarrow": False,

        }
    )

iplot(fig_effects_on_lvstck)
