In [1]:
import pandas as pd
import numpy as np
import math

from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go
import plotly.figure_factory as ff

from IPython.display import display, HTML

init_notebook_mode(connected=True)

reg_survey_complete = pd.read_excel("Northern_baseline_survey_for_CC_project_in_Sudan_final_results.xlsx", encoding = 'utf_8')

# <center>Baseline survey results for the Locality of El Golid </center>
### <center>*Climate Change and Livelihoods situation of 15 villages located in El Golid Locality, Northern State*</center>

In [2]:
HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
$( document ).ready(code_toggle);
</script>
The raw code for this IPython notebook is by default hidden for easier reading.
To toggle on/off the raw code, click <a href="javascript:code_toggle()">here</a>.''')

#bc_wages.head()
#reg_csv.info()
#bc_wages.describe()

In [3]:
# Functions

# Get ocurrences of each value for a multiple_select field
def get_ocurrences(df, column_name):
    values_sums = df.groupby(column_name).size()
    values_names = reduce(lambda x, y: x + y,map(lambda x: x.split(), values_sums.index.values))
    result = pd.Series()
    for i in values_names:
        result[i] = values_sums.loc[[i in idx for idx in values_sums.index]].sum()
    return result

def get_percentiles_values(sorted_serie, percentiles):
    if len(percentiles) == 0:
        return []
    else:
        quantity = sorted_serie.sum() * percentiles[0]
        for i in sorted_serie.index:
            if sorted_serie[i] >= quantity:
                return [i] + get_percentiles_values(sorted_serie, percentiles[1:])
            else:
                quantity -= sorted_serie[i]

def get_percentiles(sorted_serie, values):
    if len(values) == 0:
        return []
    else:
        result = 0
        for i in sorted_serie.index:
            if values[0] == i:
                return [float(result + sorted_serie[i]) / sorted_serie.sum()] + get_percentiles(sorted_serie, values[1:])
            elif values[0] < i:
                return [float(result) / sorted_serie.sum()] + get_percentiles(sorted_serie, values[1:])
            else:
                result += sorted_serie[i]
        return [1]

In [4]:
## File preprocessing
#Deleting 'demography:', 'house_assets:', 'incomes:', 'climate_change:' and those prefix which not provide useful info.

col_names = reg_survey_complete.columns
col_names = col_names.map(lambda x: x[11:] if x.startswith("demography:") else x)
col_names = col_names.map(lambda x: x[19:] if x.startswith("responding_section:") else x)
col_names = col_names.map(lambda x: x[20:] if x.startswith("responding_section2:") else x)
col_names = col_names.map(lambda x: x[13:] if x.startswith("house_assets:") else x)
col_names = col_names.map(lambda x: x[8:] if x.startswith("incomes:") else x)
col_names = col_names.map(lambda x: x[15:] if x.startswith("climate_change:") else x)
col_names = col_names.map(lambda x: x[11:] if x.startswith("intro_data:") else x)
col_names = col_names.map(lambda x: x[:11] + x[16:] if x.startswith("respondant_page:") else x)
col_names = col_names.map(lambda x: x[16:] if x.startswith("general_hh_data:") else x)
col_names = col_names.map(lambda x: x[6:] if x.startswith("males:") else x)
col_names = col_names.map(lambda x: x[8:] if x.startswith("females:") else x)
col_names = col_names.map(lambda x: x[11:] if x.startswith("importance:") else x)
col_names = col_names.map(lambda x: x[19:] if x.startswith("percentage_incomes:") else x)
col_names = col_names.map(lambda x: x[:10] + x[16:] if x.startswith("agri_gard:agri") else x)
col_names = col_names.map(lambda x: x[:10] + x[20:] if x.startswith("livestock:livestock_") else x)
col_names = col_names.map(lambda x: x[:10] + x[21:] if x.startswith("livestock:livestock") else x)
col_names = col_names.map(lambda x: x[:8] + x[17:] if x.startswith("fishing:fishing") else x)
col_names = col_names.map(lambda x: x[:7] + x[15:] if x.startswith("labour:labour") else x)
col_names = col_names.map(lambda x: x[:7] + x[14:] if x.startswith("labour:labour") else x)
col_names = col_names.map(lambda x: x[:12] + x[25:] if x.startswith("handicrafts:handicrafts") else x)
col_names = col_names.map(lambda x: x[:9] + x[19:] if x.startswith("forestry:forestry") else x)
col_names = col_names.map(lambda x: x[:6] + x[13:] if x.startswith("other:other") else x)
col_names = col_names.map(lambda x: x[7:] if x.startswith("coping:") else x)
col_names = col_names.map(lambda x: x[4:] if x.startswith(("cc1:", "cc2:", "cc3:", "ews:")) else x)
col_names = col_names.map(lambda x: x[10:] if x.startswith("renewable:") else x)
col_names = col_names.map(lambda x: x[12:] if x.startswith("agriculture_cc_effects:") else x)
col_names = col_names.map(lambda x: x[10:] if x.startswith("livestock_cc_effects:") else x)

In [5]:
# Replace ':' with '_' for compatibility reasons.
col_names = col_names.map(lambda x: x[:14] + '_' + x[15:] if x.startswith("geocoordinates:") else x)
col_names = col_names.map(lambda x: x[:9] + '_' + x[10:] if x.startswith("agri_gard:") else x)
col_names = col_names.map(lambda x: x[:9] + '_' + x[10:] if x.startswith("livestock:") else x)
col_names = col_names.map(lambda x: x[:7] + '_' + x[8:] if x.startswith("fishing:") else x)
col_names = col_names.map(lambda x: x[:6] + '_' + x[7:] if x.startswith("labour:") else x)
col_names = col_names.map(lambda x: x[:11] + '_' + x[12:] if x.startswith("handicrafts:") else x)
col_names = col_names.map(lambda x: x[:8] + '_' + x[9:] if x.startswith("forestry:") else x)
col_names = col_names.map(lambda x: x[:5] + '_' + x[6:] if x.startswith("other:") else x)
col_names = col_names.map(lambda x: x[:10] + '_' + x[11:] if x.startswith("cc_effects:") else x)
col_names = col_names.map(lambda x: x[:4] + '_' + x[5:] if x.startswith("meta:") else x)
col_names = col_names.map(lambda x: x[:3] + '_' + x[4:] if x.startswith("cea:") else x)

col_names = col_names.map(lambda x: 'livestock_lost_goat' if x == 'livestock_lost_goats' else x)

print col_names.values

reg_survey_complete.columns = col_names

[u'deviceid' u'today' u'surveyor_name' u'number' u'village'
 u'geocoordinates_Latitude' u'geocoordinates_Longitude'
 u'geocoordinates_Altitude' u'geocoordinates_Accuracy' u'responding'
 u'respondant_gender' u'respondant_age' u'respondant_marital_status'
 u'respondant_head_hh' u'education' u'handicapped_member' u'number_wifes'
 u'number_wifes_aux' u'male_infant' u'male_child' u'male_child_school'
 u'male_teenager' u'male_teenager_school' u'male_adult' u'male_elderly'
 u'female_infant' u'female_child' u'female_child_school' u'female_teenager'
 u'female_teenager_school' u'female_adult' u'female_elderly'
 u'family_males' u'family_females' u'family_total' u'family_confirm'
 u'house_material' u'belongings' u'num_beds' u'kitchen_type' u'src_incomes'
 u'imp_agri' u'imp_garden' u'imp_livestock' u'imp_fishing' u'imp_labour'
 u'imp_forestry' u'imp_handcrafts' u'imp_other' u'perc_incomes_agri'
 u'perc_incomes_garden' u'perc_incomes_livestock' u'perc_incomes_fishing'
 u'perc_incomes_labour' u'perc_

In [6]:
# Processing NaN and void values

values = {
    'respondant_marital_status': 'no_answer',
    'handicapped_member': 'no',
    'house_material': 'no_answer',
    'kitchen_type': 'no_answer',
    'src_incomes': 'none',
    'use_ews': 'no_answer',
    'renewable_knowledge': 'no_answer',
    'solar_energy': 'no_answer',
    'livestock_selling': 'no_answer',
    'livestock_social': 'no_answer',
    'livestock_milk': 'no_answer',
    'livestock_meat': 'no_answer',
    'livestock_savings': 'no_answer',
    'livestock_number_goat': 0,
    'livestock_number_sheep': 0,
    'livestock_number_camel': 0,
    'livestock_number_cattle': 0,
    'livestock_number_chicken': 0,
    'labour_participants': 'no_answer',
    'labour_sector': '',
    'agri_gard_water_harv_farmlands': 0,
    'agri_gard_land_quantity': 0,
    'cc_effects_sand_affected_farmlands': 0
}
reg_survey_complete.fillna(value=values, inplace=True)

In [7]:
# Creating villages dict
villages = reg_survey_complete['village'].unique()
villages_table = pd.read_excel("villages_northern.xlsx")
# Villages dictionary
villages_dict = dict()
ar_dict = villages_table.set_index('survey_code').to_dict()['Arabic name']
villages_dict['ar'] = ar_dict
villages_dict['en'] = villages_table.set_index('survey_code').to_dict()['English transcription']
#print repr(villages_dict).decode('unicode_escape')
reg_survey_complete['village'] = reg_survey_complete.village.map(villages_dict['en'])
print villages_dict

{'ar': {1: u'\u0644\u062a\u064a \u0642\u0633\u0645 1', 2: u'\u0644\u062a\u064a \u0642\u0633\u0645 2', 3: u'\u0644\u062a\u064a \u0642\u0633\u0645 4', 4: u'\u0627\u0644\u0645\u0642\u0627\u0648\u062f\u0629', 5: u'\u0643\u062f\u0643\u0648\u0644', 6: u'\u0646\u0627\u0648\u0627 \u0634\u0645\u0627\u0644', 7: u'\u0646\u0627\u0648\u0627 \u062c\u0646\u0648\u0628', 8: u'\u0628\u0633\u0644\u0627\u0646 \u0634\u0645\u0627\u0644', 9: u'\u0628\u0633\u0644\u0627\u0646 \u062c\u0646\u0648\u0628', 10: u'\u0627\u0644\u0632\u0631\u0627\u0626\u0628', 11: u'\u0645\u0644\u0648\u0627\u062f', 12: u'\u0631\u0648\u0645\u064a \u0628\u062d\u0631\u0649', 13: u'\u0631\u0648\u0645\u064a \u0642\u0628\u0644\u064a', 14: u'\u0631\u0648\u0645\u064a \u0648\u0633\u0637', 15: u'\u0623\u0645 \u0643\u0631\u0627\u0628\u064a\u062c', 16: u'\u0643\u0646\u0643\u0644\u0627\u0628'}, 'en': {1: u'Laty. D. 1', 2: u'Laty. D. 2', 3: u'Laty. D. 4', 4: u'Almagawda', 5: u'Kadacol', 6: u'North Nawa', 7: u'South Nawa', 8: u'North Baslan', 9: u'S

In [8]:
# For answered surveys
reg_survey = reg_survey_complete.loc[reg_survey_complete['responding'].str.match('yes')].copy()

In [9]:
# Creating num of LH strategies
reg_survey['num_lh_strats'] = reg_survey['src_incomes'].map(lambda x: len(x.split()))

In [10]:
# Separate male and female
reg_survey_male = reg_survey.loc[reg_survey['respondant_gender'].str.match('male')].copy()
reg_survey_female = reg_survey.loc[reg_survey['respondant_gender'].str.match('female')].copy()

In [11]:
## Separate per HoHH gender

# Respondent is HH
reg_survey['married'] = reg_survey['respondant_marital_status'].str.match('married')
# Females HoHH
reg_survey_female_head = reg_survey[reg_survey['respondant_gender'].str.match('female') &
                                      reg_survey['respondant_head_hh'].str.match('yes')].copy()

# Females noHoHH
reg_survey_female_no_head = reg_survey[reg_survey['respondant_gender'].str.match('female') &
                                      reg_survey['respondant_head_hh'].str.match('no')].copy()

# Males HoHH + Female noHoHH married
reg_survey_male_head = reg_survey[reg_survey['respondant_gender'].str.match('male') &
                                      reg_survey['respondant_head_hh'].str.match('yes')].copy()
reg_survey_male_head = pd.concat([reg_survey_male_head,
                                  reg_survey_female_no_head[reg_survey_female_no_head['married']]])


# Add females nHoHH nM with male_adult to male_head
reg_survey_female_nHoHH_nM = reg_survey_female_no_head[~reg_survey_female_no_head['married']]
reg_survey_male_head = pd.concat([reg_survey_male_head,
                                  reg_survey_female_nHoHH_nM[reg_survey_female_nHoHH_nM['male_adult'] > 0]])

# Add females nHoHH nM with male_adult to female_head
reg_survey_female_head = pd.concat([reg_survey_female_head,
                                    reg_survey_female_nHoHH_nM[reg_survey_female_nHoHH_nM['male_adult'] == 0]])

# Add males nHoHH with male_adult to male_head
reg_survey_male_nHoHH = reg_survey[reg_survey['respondant_gender'].str.match('male') &
                                      reg_survey['respondant_head_hh'].str.match('no')].copy()
reg_survey_maleHoHH = pd.concat([reg_survey_male_head,
                                  reg_survey_male_nHoHH[reg_survey_male_nHoHH['male_adult'] > 0]])

# Add females nHoHH with male_adult to female_head
reg_survey_femHoHH = pd.concat([reg_survey_female_head,
                                  reg_survey_male_nHoHH[reg_survey_male_nHoHH['male_adult'] == 0]])

HoHH = {}
HoHH['Male HoHH'] = reg_survey_maleHoHH['deviceid'].count()
HoHH['Female HoHH'] = reg_survey_femHoHH['deviceid'].count()

HoHH = pd.Series(HoHH)

fig = {
    "layout": {
        "title": "Households according to the gender of the HoHH"
    },
    "data": [
        {
            "values": HoHH.values,
            "labels": HoHH.index.values,
            "type": "pie",
            "sort": False
        }
    ]
}

iplot(fig)

In [12]:
reg_survey_hand_yes = reg_survey.loc[reg_survey.handicapped_member == 'yes'].copy()
reg_survey_hand_no = reg_survey.loc[reg_survey.handicapped_member == 'no'].copy()
print reg_survey['deviceid'].count()
yes_no_number = [reg_survey_hand_yes['deviceid'].count(), reg_survey_hand_no['deviceid'].count()]
yes_no_values = ['yes', 'no']

fig = {
    "layout": {
        "title": "Households with handicapped members"
    },
    "data": [
        {
            "values": yes_no_number,
            "labels": yes_no_values,
            "type": "pie",
            "sort": False
        }
    ]
}

iplot(fig)

524


In [13]:
def create_graphs_HoHH(data_func, values_func, labels_func, sum_func, title):
    create_three_pie_graphs((reg_survey_femHoHH, reg_survey, reg_survey_maleHoHH), data_func, values_func,
                            labels_func, sum_func, title,
                            "Female Head of Household", .04,
                            "Global", .5,
                            "Male Head of Household", .94)

def create_graphs_respondent(data_func, values_func, labels_func, sum_func, title):
    create_three_pie_graphs((reg_survey_female, reg_survey, reg_survey_male), data_func, values_func,
                            labels_func, sum_func, title,
                            "Female Respondent", .07,
                            "Global", .5,
                            "Male Respondent", .91)

def create_pie_graph(serie, title):
    fig = {
        "layout": {
            "title": title
        },
        "data": [
            {
                "values": serie.values,
                "labels": serie.index.values,
                "type": "pie",
                "sort": False
            }
        ]
    }

    iplot(fig)
    
    
def create_three_pie_graphs((df1, df2, df3), data_func, values_func,
                            labels_func, sum_func, title,
                            title1, title1_x_offset,
                            title2, title2_x_offset,
                            title3, title3_x_offset):
    data_fem = data_func(df1)
    data_global = data_func(df2)
    data_male = data_func(df3)

    fig = {
        "layout": {
            "title": title,
            "annotations": [
                {
                    "font": {
                        "size": 14
                    },
                    "showarrow": False,
                    #"text": title1 + " (" + str(sum_func(data_fem)) + ")",
                    "text": title1,
                    "x": title1_x_offset,
                    "y": 1
                },
                {
                    "font": {
                        "size": 14
                    },
                    "showarrow": False,
                    #"text": title2 + " (" + str(sum_func(data_global)) + ")",
                    "text": title2,
                    "x": title2_x_offset,
                    "y": 1
                },
                {
                    "font": {
                        "size": 14
                    },
                    "showarrow": False,
                    #"text": title3 + " (" + str(sum_func(data_male)) + ")",
                    "text": title3,
                    "x": title3_x_offset,
                    "y": 1
                }
            ]
        },
        "data": [
            {
                "values": values_func(data_fem),
                "labels": labels_func(data_fem),
                "domain": {
                    "x": [0, .32]
                },
                "type": "pie",
                "sort": False
            },
            {
                "values": values_func(data_global),
                "labels": labels_func(data_global),
                "domain": {
                    "x": [.34, .64]
                },
                "type": "pie",
                "sort": False
            },
            {
                "values": values_func(data_male),
                "labels": labels_func(data_male),
                "domain": {
                    "x": [.66, .98]
                },
                "type": "pie",
                "sort": False
            }
        ]
    }

    iplot(fig)

def create_stacked_bars_graph((df1, df2), data_func, x_func, y_func, title,
                              title_df1, title_df2):
    data_global = data_func(pd.concat([df1, df2]))
    data_fem = data_func(df1)
    data_male = data_func(df2)

    fig = {
        "layout": {
            "title": title,
            "xaxis": {
                "tickangle": -45
            },
            "barmode": "stack",
            "width": 700,
            "annotations": []
        },
        "data": [
            {
                "x": x_func(data_fem),
                "y": y_func(data_fem),
                "type": "bar",
                "name": title_df1,
                "marker": {
                    "color": 'rgb(225,225,258)',
                    "line": {
                        "color": 'rgb(0,0,192)',
                        "width": 1.5
                    }
                },
            },
            {
                "x": x_func(data_male),
                "y": y_func(data_male),
                "type": "bar",
                "name": title_df2,
                "marker": {
                    "color": 'rgb(258,225,225)',
                    "line": {
                        "color": 'rgb(192,0,0)',
                        "width": 1.5
                    }
                },
            }
        
        ]
    }


#    for i in range(0, len(kitchen_type)):
#        fig["layout"]["annotations"].append(
#            {
#                 "x": map(lambda x: x.capitalize(), kitchen_perc.index.values)[i],
#                 "y": kitchen_perc.values[i] + 1.5,
#                "x": pd.concat([df1, df2])[i],
#                "y": y_func(data_global)[i] + 1.5,
#                "text": str(data_global.values[i]) + "%",
#                "font": dict(
#                    family='Arial',
#                    size=14,
#                    color='rgba(0, 0, 100, 1)'
#                ),
#                "showarrow": False,
#    
#            }
#        )
#
#
    iplot(fig)
    

def plot_wealth_criteria(groupby_data, values, title):
    groupby_data = groupby_data * 100 / groupby_data.sum()
    poor = groupby_data[groupby_data.index <= values[0]]
    medium = groupby_data[groupby_data.index > values[0]]
    medium = medium[medium.index <= values[1]]
    rich = groupby_data[groupby_data.index > values[1]]
    rich = rich[rich.index <= values[2]]
    print medium
    
    fig_distribution = {
        "layout": {
            "title": title,
            "yaxis": {
                "ticksuffix": "%"
            },
            "width": 700,
            "annotations": []
        },
        "data": [
            {
                "x": poor.index.values,
                "y": poor.values,
                "type": "bar",
                "name": "Very poor (" + str(round(poor.values.sum(), 1)) + "%)" ,
                "marker": {
                    "color": 'rgb(214,39,40)',
                }
            },
            {
                "x": medium.index.values,
                "y": medium.values,
                "type": "bar",
                "name": "Poor (" + str(round(medium.values.sum(), 1)) + "%)",
                "marker": {
                    "color": 'rgb(255,127,14)',
                }
            },
            {
                "x": rich.index.values,
                "y": rich.values,
                "type": "bar",
                "name": "Medium (" + str(round(rich.values.sum(), 1)) + "%)",
                "marker": {
                    "color": 'rgb(44,160,44)',
                }
            }
        ]
    }
    
    iplot(fig_distribution)


## General data

Number of Surveys:

In [14]:
print "Total:" + str(reg_survey_complete['deviceid'].count())
print ""
total_surveys = reg_survey['deviceid'].count()
print "Responding:" + str(total_surveys)
print ""

Total:525

Responding:524



### Respondents per gender and village

In [15]:
surveys_village = reg_survey.groupby('village').size()
surveys_village_male = reg_survey_male.groupby('village').size()
surveys_village_female = reg_survey_female.groupby('village').size()
respondants_gender = pd.DataFrame(surveys_village, columns= ['Total'])
respondants_gender['Male'] = surveys_village_male
respondants_gender['Male %'] = (surveys_village_male*100 / surveys_village).round(2)
respondants_gender['Female'] = surveys_village_female
respondants_gender['Female %'] = (surveys_village_female*100 / surveys_village).round(2)
respondants_gender = respondants_gender.reindex(['Male', 'Male %', 'Female', 'Female %', 'Total'], axis=1)
respondants_gender.loc['Total', 'Total'] = respondants_gender['Total'].sum()
respondants_gender.loc['Total', 'Male'] = respondants_gender['Male'].sum()
respondants_gender.loc['Total', 'Male %'] = round(100*respondants_gender.loc['Total', 'Male'] / respondants_gender.loc['Total', 'Total'], 2)
respondants_gender.loc['Total', 'Female'] = respondants_gender['Female'].sum()
respondants_gender.loc['Total', 'Female %'] = round(100*respondants_gender.loc['Total', 'Female'] / respondants_gender.loc['Total', 'Total'], 2)
respondants_gender['Male'] = respondants_gender['Male'].astype(int)
respondants_gender['Female'] = respondants_gender['Female'].astype(int)
respondants_gender['Total'] = respondants_gender['Total'].astype(int)

display(respondants_gender)

Unnamed: 0_level_0,Male,Male %,Female,Female %,Total
village,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Almagawda,13,31.71,28,68.29,41
Alzaraib,11,20.75,42,79.25,53
Kadacol,14,33.33,28,66.67,42
Kankalab,10,26.32,28,73.68,38
Laty. D. 1,18,54.55,15,45.45,33
Laty. D. 2,4,30.77,9,69.23,13
Laty. D. 4,12,35.29,22,64.71,34
Medium Romy,3,20.0,12,80.0,15
Molwad,26,56.52,20,43.48,46
North Baslan,11,44.0,14,56.0,25


In [16]:
surveys_village_maleHoHH = reg_survey_maleHoHH.groupby('village').size()
surveys_village_femHoHH = reg_survey_femHoHH.groupby('village').size()
HoHH_gender = pd.DataFrame(surveys_village, columns= ['Total'])
HoHH_gender['Male'] = surveys_village_maleHoHH
HoHH_gender['Male %'] = (surveys_village_maleHoHH*100 / surveys_village).round(2)
HoHH_gender['Female'] = surveys_village_femHoHH
HoHH_gender['Female %'] = (surveys_village_femHoHH*100 / surveys_village).round(2)
HoHH_gender = HoHH_gender.reindex(['Male', 'Male %', 'Female', 'Female %', 'Total'], axis=1)
HoHH_gender.loc['Total', 'Total'] = HoHH_gender['Total'].sum()
HoHH_gender.loc['Total', 'Male'] = HoHH_gender['Male'].sum()
HoHH_gender.loc['Total', 'Male %'] = round(100*HoHH_gender.loc['Total', 'Male'] / HoHH_gender.loc['Total', 'Total'], 2)
HoHH_gender.loc['Total', 'Female'] = HoHH_gender['Female'].sum()
HoHH_gender.loc['Total', 'Female %'] = round(100*HoHH_gender.loc['Total', 'Female'] / HoHH_gender.loc['Total', 'Total'], 2)
HoHH_gender['Male'] = HoHH_gender['Male'].astype(int)
HoHH_gender['Female'] = HoHH_gender['Female'].astype(int)
HoHH_gender['Total'] = HoHH_gender['Total'].astype(int)

display(HoHH_gender)

Unnamed: 0_level_0,Male,Male %,Female,Female %,Total
village,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Almagawda,26,63.41,15,36.59,41
Alzaraib,33,62.26,20,37.74,53
Kadacol,24,57.14,18,42.86,42
Kankalab,25,65.79,13,34.21,38
Laty. D. 1,23,69.7,10,30.3,33
Laty. D. 2,8,61.54,5,38.46,13
Laty. D. 4,20,58.82,14,41.18,34
Medium Romy,8,53.33,7,46.67,15
Molwad,31,67.39,15,32.61,46
North Baslan,19,76.0,6,24.0,25


### HHs Composition

In [17]:
print reg_survey_femHoHH['deviceid'].count()
print reg_survey_maleHoHH['deviceid'].count()

187
337


In [18]:
create_graphs_HoHH(lambda x: x.groupby('education').size(),
                 lambda x: x.values,
                 lambda x: x.index.values,
                   lambda x: x.sum(),
                 "Education level of the head of household")


In [19]:
#hey = reg_survey.groupby(['village', 'house_material']).size()

#house_materials_df = pd.DataFrame(index=house_materials_index, columns=villages, data=[house_materials_general])
#print house_materials_df

create_graphs_HoHH(lambda x: pd.crosstab(x.village, x.house_material),
                 lambda x: x.sum().values,
                 lambda x: x.columns.values,
                   lambda x: x.sum().sum(),
                 "House materials")

create_pie_graph(reg_survey.groupby('house_material').size(), "House materials")

fig_house_materials2 = {
    "layout": {
        "title": {
            "font": {
                    "size": 20
            },
            "text": "House materials"
        }
    },
    "data": []
}

house_materials_df = pd.crosstab(reg_survey.village, reg_survey.house_material)
#print range(len(villages_dict['en'].keys())-1)
#print house_materials_df
for i in range(len(villages_dict['en'].keys())):
    x = .2*(i%5)
    y = 0.35*(i/5)
    fig_house_materials2['data'].append(
        {

            "values": house_materials_df.iloc[i-1].values,
            "labels": house_materials_df.columns.values,
             "domain": {
                "x": [x, x+.18],
                 "y": [y, y+.3]
            },
            "type": "pie",
            "sort": False
        })

iplot(fig_house_materials2)

###  Mean Household size

In [20]:
mean_hh_size = reg_survey.family_total.mean()
print str(mean_hh_size) + " persons per household"

5.67938931298 persons per household


## Climate change awareness

In [21]:
create_graphs_respondent(lambda x: x.groupby('cc_knowledge').size(),
                         lambda x: x.values,
                         lambda x: x.index.values,
                         lambda x: x.sum(),
                         "Households declaring CC knowledge")

In [22]:
create_graphs_respondent(lambda x: x.groupby('taking_action_cc').size(),
                         lambda x: x.values,
                         lambda x: x.index.values,
                         lambda x: x.sum(),
                         "Households declaring taking actions against CC")

#### EWS awareness

In [23]:
create_graphs_respondent(lambda x: x.groupby('ews_knowledge').size(),
                         lambda x: x.values,
                         lambda x: x.index.values,
                         lambda x: x.sum(),
                         "Households declaring EWS knowledge")

In [24]:
create_graphs_respondent(lambda x: x.groupby('use_ews').size(),
                         lambda x: x.values,
                         lambda x: x.index.values,
                         lambda x: x.sum(),
                         "Households using EWS information")

#### Renewable energy awareness

In [25]:
create_graphs_respondent(lambda x: x.groupby('renewable_knowledge').size(),
                         lambda x: x.values,
                         lambda x: x.index.values,
                         lambda x: x.sum(),
                         "Households declaring renewable energies knowledge")

In [26]:
create_graphs_respondent(lambda x: x.groupby('solar_energy').size(),
                         lambda x: x.values,
                         lambda x: x.index.values,
                         lambda x: x.sum(),
                         "Households using solar energies")

#### Cooking facilities

In [27]:
kitchen_type = reg_survey.groupby('kitchen_type').size()
kitchen_type_male = get_ocurrences(reg_survey_maleHoHH, 'kitchen_type').sort_values(ascending = False)
kitchen_type_female = get_ocurrences(reg_survey_femHoHH, 'kitchen_type').sort_values(ascending = False)
kitchen = pd.crosstab(reg_survey.village, reg_survey.kitchen_type)
#display(kitchen)

kitchen_perc_male = 100*kitchen_type_male/total_surveys
kitchen_perc_female = 100*kitchen_type_female/total_surveys
kitchen_perc = 100*kitchen_type/total_surveys
kitchen_perc = kitchen_perc.round(2)
#print kitchen_perc

create_stacked_bars_graph((reg_survey_femHoHH, reg_survey_maleHoHH),
                              lambda x: 100 * get_ocurrences(x, 'kitchen_type').sort_values(ascending = False) / total_surveys,
                              lambda x: map(lambda y: y.capitalize(), x.index.values),
                              lambda x: x.values,
                              "Cooking techniques",
                              "Female Head of Household",
                              "Male Head of Household")

## Livelihoods

Number of households practicing each livelihood

In [28]:
#reg_survey.loc[reg_survey['src_incomes'].str.contains("agriculture"), 'agriculture'] = 'yes'
#reg_survey.loc[reg_survey['src_incomes'].str.contains("livestock"), 'livestock'] = 'yes'
#reg_survey.loc[reg_survey['src_incomes'].str.contains("labour"), 'labour'] = 'yes'
#importance = reg_survey.groupby(['agriculture', 'livestock', 'labour'])['perc_incomes_agri'].agg('mean')

#agric = reg_survey[reg_survey['src_incomes'].str.contains("agriculture")]['deviceid'].count()

src_incomes_sums = reg_survey.groupby('src_incomes').size()
lh_values = get_ocurrences(reg_survey, 'src_incomes')
lh_types = pd.DataFrame({"Households": lh_values})
#lh_types["Percentage"] = lh_types.Households.map(lambda x: str(round(100*x/total_surveys))) + " %"

print ""
print lh_values.sort_values(ascending = False)


agriculture    212
labour         180
other          102
livestock       62
handicrafts     13
garden          12
fishing          7
none             5
forestry         1
dtype: int64


In [29]:
lh_perc = 100*lh_values/total_surveys
lh_perc = lh_perc.round(2)

create_stacked_bars_graph((reg_survey_femHoHH, reg_survey_maleHoHH),
                              lambda x: 100 * get_ocurrences(x, 'src_incomes') / total_surveys,
                              lambda x: map(lambda y: y.capitalize(), x.index.values),
                              lambda x: x.values,
                              "Livelihoods Strategies",
                              "Female Head of Household",
                              "Male Head of Household")

create_stacked_bars_graph((reg_survey_female, reg_survey_male),
                              lambda x: 100 * get_ocurrences(x, 'src_incomes') / total_surveys,
                              lambda x: map(lambda y: y.capitalize(), x.index.values),
                              lambda x: x.values,
                              "Livelihoods Strategies",
                              "Female respondent",
                              "Male respondent")

livelihoods_strategies = {
    "layout": {
        "title": "Livelihoods Strategies",
        "xaxis": {
            "tickangle": -45
        },
        "width": 700,
        "annotations": []
    },
    "data": [
        {
            "x": map(lambda x: x.capitalize(), lh_perc.index.values),
            "y": lh_perc.values,
            "type": "bar",
            "name": "Livelihoods Strategies",
            "marker": {
                "color": 'rgb(258,225,225)',
                "line": {
                    "color": 'rgb(192,0,0)',
                    "width": 1.5
                }
            },
        }
    ]
}


for i in range(0, len(lh_values)):
    livelihoods_strategies["layout"]["annotations"].append(
        {
            "x": map(lambda x: x.capitalize(), lh_perc.index.values)[i],
            "y": lh_perc.values[i] + 1.5,
            "text": str(lh_perc.values[i]) + "%",
            "font": dict(
                family='Arial',
                size=14,
                color='rgba(100, 0, 0, 1)'
            ),
            "showarrow": False,

        }
    )

iplot(livelihoods_strategies)

In [30]:

lh_values_hand = get_ocurrences(reg_survey_hand_yes, 'src_incomes')
lh_perc_hand = 100*lh_values_hand/reg_survey_hand_yes['deviceid'].count()
lh_perc_hand = lh_perc_hand.round(2)

livelihoods_strategies_hand = {
    "layout": {
        "title": "Livelihoods Strategies in HH with dependent members",
        "xaxis": {
            "tickangle": -45
        },
        "width": 700,
        "annotations": []
    },
    "data": [
        {
            "x": map(lambda x: x.capitalize(), lh_perc_hand.index.values),
            "y": lh_perc_hand.values,
            "type": "bar",
            "name": "Livelihoods Strategies",
            "marker": {
                "color": 'rgb(258,225,225)',
                "line": {
                    "color": 'rgb(192,0,0)',
                    "width": 1.5
                }
            },
        }
    ]
}


for i in range(0, len(lh_values_hand)):
    livelihoods_strategies_hand["layout"]["annotations"].append(
        {
            "x": map(lambda x: x.capitalize(), lh_perc_hand.index.values)[i],
            "y": lh_perc_hand.values[i] + 1.2,
            "text": str(lh_perc_hand.values[i]) + "%",
            "font": dict(
                family='Arial',
                size=14,
                color='rgba(100, 0, 0, 1)'
            ),
            "showarrow": False,

        }
    )

iplot(livelihoods_strategies_hand)

Main livelihoods per household

In [31]:
print src_incomes_sums.sort_values(ascending = False)
reg_survey['num_lh_strats'] = reg_survey['src_incomes'].map(lambda x: len(x.split()))
create_graphs_HoHH(lambda x: x.groupby('num_lh_strats').size(),
                         lambda x: x.values,
                         lambda x: x.index.values,
                         lambda x: x.sum(),
                         "Number of livelihoods Strategies")

src_incomes
labour                            171
agriculture                       164
other                              95
agriculture livestock              34
handicrafts                        10
livestock                           9
livestock other                     7
agriculture garden livestock        6
fishing                             5
none                                5
agriculture labour                  4
livestock labour                    3
agriculture garden                  3
garden                              2
agriculture livestock forestry      1
fishing handicrafts                 1
livestock handicrafts               1
garden livestock                    1
labour handicrafts                  1
fishing labour                      1
dtype: int64


In [32]:
reg_survey_hand_yes['num_lh_strats'] = reg_survey_hand_yes['src_incomes'].map(lambda x: len(x.split()))
create_pie_graph(reg_survey_hand_yes.groupby('num_lh_strats').size(), "Number livelihoods strategies in HH with dependent members")

- 277 out of 399 hh, i.e. 69.42%, lives only on livestock, forestry or both.

### Livestock

Number of interviewed households owning each type of livestock

In [33]:
livestock_set = reg_survey.loc[reg_survey['src_incomes'].str.contains("livestock")]

participants = livestock_set.groupby('livestock_participants').size()
participants_goats = livestock_set.loc[livestock_set['livestock_type'].str.contains("goat")].groupby('livestock_participants').size()
#print participants_goats

lvstk_types = ['camel', 'cattle', 'goat', 'sheep', 'chicken']

types_lvstck = get_ocurrences(livestock_set, 'livestock_type')
livestock_women = livestock_set.loc[livestock_set['livestock_participants'] == "women"]
type_women =  livestock_women.groupby('livestock_type').size()
#livestock_set = livestock_set.copy()
for i in lvstk_types:
    livestock_set[i + '_reduced'] = livestock_set['livestock_lost_' + i] / (livestock_set['livestock_number_' + i] + livestock_set['livestock_number_' + i])
#print ("Types for women")
#print(type_women)
#print livestock_set.groupby('camel_reduced').size()
goats = livestock_set['livestock_number_goat']

cattle = livestock_set['livestock_number_cattle'].dropna()
sheeps = livestock_set['livestock_number_sheep'].dropna()
camels = livestock_set['livestock_number_camel'].dropna()
chickens = livestock_set['livestock_number_chicken'].dropna()

livestock_set.groupby('livestock_training').size()

livestock_training
no     59
yes     1
dtype: int64

In [34]:
perc_lvstck = 100*types_lvstck/livestock_set['deviceid'].count()
perc_lvstck = perc_lvstck.round(2)
fig_types_lvstck = {
    "layout": {
        "title": "Livestock types",
        "xaxis": {
            "tickangle": -45
        },
        "width": 700,
        "annotations": []
    },
    "data": [
        {
            "x": map(lambda x: x.capitalize(), perc_lvstck.index.values),
            "y": perc_lvstck.values,
            "type": "bar",
            "name": "Livelihoods Strategies",
            "marker": {
                "color": 'rgb(258,225,225)',
                "line": {
                    "color": 'rgb(192,0,0)',
                    "width": 1.5
                }
            }
        }
    ]
}


for i in range(0, len(types_lvstck)):
    fig_types_lvstck["layout"]["annotations"].append(
        {
            "x": map(lambda x: x.capitalize(), perc_lvstck.index.values)[i],
            "y": perc_lvstck.values[i] + 1.5,
            "text": str(perc_lvstck.values[i]) + "%",
            "font": dict(
                family='Arial',
                size=14,
                color='rgba(100, 0, 0, 1)'
            ),
            "showarrow": False,

        }
    )

iplot(fig_types_lvstck)

In [35]:
prueba = pd.crosstab(livestock_set.livestock_number_goat, livestock_set.livestock_number_sheep).stack().reset_index(name='number')
#prueba.plot.scatter('livestock_number_goats', 'livestock_number_sheep', s=prueba.number * 10)
prueba = prueba[prueba.number != 0]
prueba = prueba[prueba.livestock_number_goat <= 100]
prueba = prueba[prueba.livestock_number_sheep <= 100]
prueba = prueba.drop(0)
#display(prueba)
goats_vs_sheeps = {
    "layout": {
        "title": "Relation between the number of goats and sheeps",
        "xaxis": {
            "title": "Number of goats"
        },
        "yaxis": {
            "title": "Number of sheeps"
        }
    },
    "data": [
        {

            "x": prueba.livestock_number_goat,
            "y": prueba.livestock_number_sheep,
            "mode": "markers",
            "marker": {
                "size": prueba.number * 5               
            },
            "type": "scatter"
        }
    ]
}
iplot(goats_vs_sheeps)

In [36]:
prueba2 = pd.crosstab(livestock_set.livestock_number_cattle, livestock_set.livestock_number_sheep).stack().reset_index(name='number')
#prueba.plot.scatter('livestock_number_goats', 'livestock_number_sheep', s=prueba.number * 10)
prueba2 = prueba2[prueba2.number != 0]
prueba2= prueba2.drop(0)
prueba2 = prueba2[prueba2.livestock_number_cattle <= 100]
prueba2 = prueba2[prueba2.livestock_number_sheep <= 100]
#display(prueba)
cattle_vs_sheeps = {
    "layout": {
        "title": "Relation between the number of cattle and sheeps",
        "xaxis": {
            "title": "Number of cattle"
        },
        "yaxis": {
            "title": "Number of sheeps"
        }
    },
    "data": [
        {

            "x": prueba2.livestock_number_cattle,
            "y": prueba2.livestock_number_sheep,
            "mode": "markers",
            "marker": {
                "size": prueba.number * 5               
            },
            "type": "scatter"
        }
    ]
}
iplot(cattle_vs_sheeps)

In [37]:
prueba3 = pd.crosstab(livestock_set.livestock_number_goat, livestock_set.livestock_number_cattle).stack().reset_index(name='number')
#prueba.plot.scatter('livestock_number_goats', 'livestock_number_sheep', s=prueba.number * 10)
prueba3 = prueba3[prueba3.number != 0]
prueba3= prueba3.drop(0)
prueba3 = prueba3[prueba3.livestock_number_goat <= 100]
prueba3 = prueba3[prueba3.livestock_number_cattle <= 100]
#display(prueba)
goat_vs_cattle = {
    "layout": {
        "title": "Relation between the number of cattle and sheeps",
        "xaxis": {
            "title": "Number of goat"
        },
        "yaxis": {
            "title": "Number of cattle"
        }
    },
    "data": [
        {

            "x": prueba3.livestock_number_goat,
            "y": prueba3.livestock_number_cattle,
            "mode": "markers",
            "marker": {
                "size": prueba.number * 5               
            },
            "type": "scatter"
        }
    ]
}
iplot(goat_vs_cattle)

With a few exceptions, the number of goats per household is proportional to the number of sheeps. So to define wealth groups for livestock tenants the number of goats will be the main variable.

In [38]:
quantiles = [.35, .97, 1]
q = goats.quantile(quantiles)
print "Quantiles:"
print q
goats1 = goats[goats < q[quantiles[0]]].dropna()
goats2 = goats[goats.between(q[quantiles[0]], q[quantiles[1]])].dropna()
goats3 = goats[goats.between(q[quantiles[1]], q[quantiles[2]])].dropna()
fig = ff.create_distplot([goats1.values, goats2.values, goats3.values], ['poor', 'medium', 'rich'])
iplot (fig)

Quantiles:
0.35     1.35
0.97    10.17
1.00    23.00
Name: livestock_number_goat, dtype: float64


#### Quantiles for goats
- Poors: 35% of households have less than 10 goats and less than 10 sheeps
- Medium: 62% of households have between 10 and 35 goats and between 20 and 30 sheeps
- Richs: 3% of households have more than 35 goats and/or more than 30 sheeps

In [39]:
livestock_uses = ["selling", "social", "milk", "meat", "savings"]
goats_uses = ["selling", "social", "milk", "meat", "savings"]
sheeps_uses = ["selling", "social", "milk", "meat", "savings"]
livestock_uses_df = livestock_set[map(lambda x: "livestock_" + x, livestock_uses)]
livestock_uses_df.fillna('no')
print "USES TOTAL LIVESTOCK"
print livestock_uses_df.apply(pd.Series.value_counts)
goats_uses = livestock_set.loc[livestock_set['livestock_type'].str.contains("goat")][map(lambda x: "livestock_" + x, goats_uses)]
print ""
print "USES TOTAL GOATS"
print goats_uses.apply(pd.Series.value_counts)
sheeps_uses = livestock_set.loc[livestock_set['livestock_type'].str.contains("sheep")][map(lambda x: "livestock_" + x, sheeps_uses)]
print ""
print "USES TOTAL SHEEPS"
print sheeps_uses.apply(pd.Series.value_counts)

USES TOTAL LIVESTOCK
     livestock_selling  livestock_social  livestock_milk  livestock_meat  \
no                  35                38               3               4   
yes                 27                24              59              58   

     livestock_savings  
no                  39  
yes                 23  

USES TOTAL GOATS
     livestock_selling  livestock_social  livestock_milk  livestock_meat  \
no                  23                26               1               3   
yes                 22                19              44              42   

     livestock_savings  
no                  27  
yes                 18  

USES TOTAL SHEEPS
     livestock_selling  livestock_social  livestock_milk  livestock_meat  \
no                  25                28               1               2   
yes                 23                20              47              46   

     livestock_savings  
no                  28  
yes                 20  


### Laborers

In [40]:
labour_set = reg_survey.loc[reg_survey['src_incomes'].str.contains("labour")]
labour_set_femHoHH = reg_survey_femHoHH.loc[reg_survey_femHoHH['src_incomes'].str.contains("labour")]
labor_sector = get_ocurrences(labour_set, 'labour_sector')
labor_sector_femHoHH = get_ocurrences(labour_set_femHoHH, 'labour_sector')
#create_pie_graph(labor_sector, "Labor sector")
create_graphs_HoHH(lambda x: get_ocurrences(x.loc[x['src_incomes'].str.contains("labour")], 'labour_sector'),
                         lambda x: x.values,
                         lambda x: x.index.values,
                         lambda x: x.sum(),
                         "Labor sector")
labour_set.loc[labour_set['labour_participants'].str.contains('women')][['labour_participants', 'respondant_gender', 'respondant_age', 'respondant_head_hh', 'male_teenager', 'male_adult', 'male_elderly', 'labour_sector']]
labour_incomes = labour_set.groupby('labour_daily_salary').size()
mean_salary = labour_set['labour_daily_salary'].mean()
sectors_100 = labour_set.loc[labour_set['labour_daily_salary'] == 100].groupby('labour_sector').size()
print str(labour_set['deviceid'].count()) + " laborers"
create_graphs_HoHH(lambda x: x.loc[x['src_incomes'].str.contains("labour")].groupby('labour_work_term').size(),
                         lambda x: x.values,
                         lambda x: x.index.values,
                         lambda x: x.sum(),
                         "Temporality of laborers")
create_graphs_HoHH(lambda x: x.loc[x['src_incomes'].str.contains("labour")].groupby('labour_employment_type').size(),
                         lambda x: x.values,
                         lambda x: x.index.values,
                         lambda x: x.sum(),
                         "Employment Type")
create_graphs_HoHH(lambda x: x.loc[x['src_incomes'].str.contains("labour")].groupby('labour_participants').size(),
                         lambda x: x.values,
                         lambda x: x.index.values,
                         lambda x: x.sum(),
                         "Gender of laborers")

180 laborers


In [41]:
gov_salaries = labour_set.loc[labour_set['labour_sector'] == 'government']
agri_salaries = labour_set.loc[labour_set['labour_sector'] == 'agriculture']
trans_salaries = labour_set.loc[labour_set['labour_sector'] == 'transportation']
other_salaries = labour_set.loc[labour_set['labour_sector'] == 'other']
print mean_salary
gov_salaries_mean = gov_salaries['labour_daily_salary'].mean()
gov_salaries_num = gov_salaries.groupby('labour_daily_salary').size()
agri_salaries_mean = agri_salaries['labour_daily_salary'].mean()
agri_salaries_num = agri_salaries.groupby('labour_daily_salary').size()
trans_salaries_mean = trans_salaries['labour_daily_salary'].mean()
trans_salaries_num = trans_salaries.groupby('labour_daily_salary').size()
other_salaries_mean = other_salaries['labour_daily_salary'].mean()
other_salaries_num = other_salaries.groupby('labour_daily_salary').size()

salaries = {
    "layout": {
        "title": "Relation between the number of cattle and sheeps",
        "xaxis": {
            "title": "Number of goat"
        },
        "yaxis": {
            "title": "Number of cattle"
        }
    },
    "data": [
        {

            "x": prueba3.livestock_number_goat,
            "y": prueba3.livestock_number_cattle,
            "mode": "markers",
            "marker": {
                "size": prueba.number * 5               
            },
            "type": "scatter"
        }
    ]
}
iplot(goat_vs_cattle)

71.8044692737


### Agriculture

In [42]:
agri_set = reg_survey.loc[reg_survey['src_incomes'].str.contains("agriculture")].copy()
agri_set_female = reg_survey_female.loc[reg_survey_female['src_incomes'].str.contains("agriculture")].copy()
agri_set_femHoHH = reg_survey_femHoHH.loc[reg_survey_femHoHH['src_incomes'].str.contains("agriculture")].copy()
agri_set_hand_yes = reg_survey_hand_yes.loc[reg_survey_hand_yes['src_incomes'].str.contains("agriculture")].copy()

land_quantity = agri_set.groupby('agri_gard_land_quantity').size()
land_quantity_female = agri_set_female.groupby('agri_gard_land_quantity').size()
land_quantity_femHoHH = agri_set_femHoHH.groupby('agri_gard_land_quantity').size()
land_quantity_hand_yes = agri_set_hand_yes.groupby('agri_gard_land_quantity').size()
use_quantity = agri_set.groupby('agri_gard_use_quantity').size()

d = {'land': land_quantity, 'use': use_quantity}
use_land = pd.DataFrame(d)
hey = use_quantity.index.map(lambda x: str(x))
use_quantity.index = hey
print use_quantity.index
display(use_quantity)
display(use_land)
display(land_quantity)

Index([u'0.0', u'1.0', u'2.0', u'3.0', u'4.0', u'5.0', u'6.0', u'8.0', u'10.0',
       u'30.0', u'400.0', u'2100.0'],
      dtype='object', name=u'agri_gard_use_quantity')


agri_gard_use_quantity
0.0       11
1.0       60
2.0       63
3.0       18
4.0       10
5.0        9
6.0        6
8.0        1
10.0       6
30.0       1
400.0      1
2100.0     3
dtype: int64

Unnamed: 0,land,use
0.0,37.0,11.0
1.0,52.0,60.0
2.0,54.0,63.0
3.0,22.0,18.0
4.0,12.0,10.0
5.0,14.0,9.0
6.0,6.0,6.0
7.0,1.0,
8.0,1.0,1.0
10.0,8.0,6.0


agri_gard_land_quantity
0.0       37
1.0       52
2.0       54
3.0       22
4.0       12
5.0       14
6.0        6
7.0        1
8.0        1
10.0       8
50.0       1
400.0      1
2100.0     3
dtype: int64

In [43]:
values = [0,5, land_quantity.index[-4]]
values_female = [0,5, land_quantity_female.index[-3]]
values_femHoHH = [0,5, land_quantity_femHoHH.index[-2]]
values_hand = [0,5, land_quantity_hand_yes.index[-2]]
print land_quantity
per2 = get_percentiles(land_quantity, values)
per_hand = get_percentiles(land_quantity_hand_yes, values_hand)
percentiles = pd.Series(values, index = per2)
percentiles_hand = pd.Series(values_hand, index = per_hand)

print percentiles_hand

print percentiles

print pd.Series(values_female, index = get_percentiles(land_quantity_female, values_female))
print pd.Series(values_femHoHH, index = get_percentiles(land_quantity_femHoHH, values_femHoHH))

plot_wealth_criteria(land_quantity, values, "Number of feddans per wealth group")
#plot_wealth_criteria(land_quantity_female, values_female, "Number of feddans per wealth group in female respondents")
plot_wealth_criteria(land_quantity_femHoHH, values_femHoHH, "Number of feddans per wealth group in female HoHH")
plot_wealth_criteria(land_quantity_hand_yes, values_hand, "Number of feddans per wealth group in handicapped")

agri_gard_land_quantity
0.0       37
1.0       52
2.0       54
3.0       22
4.0       12
5.0       14
6.0        6
7.0        1
8.0        1
10.0       8
50.0       1
400.0      1
2100.0     3
dtype: int64
0.107143     0.0
0.892857     5.0
0.964286    10.0
dtype: float64
0.174528     0.0
0.900943     5.0
0.976415    10.0
dtype: float64
0.081967     0.0
0.934426     5.0
0.975410    10.0
dtype: float64
0.229730     0.0
0.932432     5.0
0.986486    10.0
dtype: float64
agri_gard_land_quantity
1.0    24.528302
2.0    25.471698
3.0    10.377358
4.0     5.660377
5.0     6.603774
dtype: float64


agri_gard_land_quantity
1.0    17.567568
2.0    25.675676
3.0    12.162162
4.0     6.756757
5.0     8.108108
dtype: float64


agri_gard_land_quantity
1.0    10.714286
2.0    35.714286
3.0    14.285714
4.0    10.714286
5.0     7.142857
dtype: float64


In [44]:
quantiles = [.335, .915, 1]
q = goats.quantile(quantiles)
print "Quantiles:"
print q
goats1 = goats[goats < q[quantiles[0]]].dropna()
goats2 = goats[goats.between(q[quantiles[0]], q[quantiles[1]])].dropna()
goats3 = goats[goats.between(q[quantiles[1]], q[quantiles[2]])].dropna()
#fig = ff.create_distplot([goats1.values, goats2.values, goats3.values], ['poor', 'medium', 'rich'])
#iplot (fig)

Quantiles:
0.335     1.0
0.915     7.0
1.000    23.0
Name: livestock_number_goat, dtype: float64


In [45]:
use_vs_have = pd.crosstab(agri_set.agri_gard_land_quantity, agri_set.agri_gard_use_quantity).stack().reset_index(name='feddans')
#prueba.plot.scatter('livestock_number_goats', 'livestock_number_sheep', s=prueba.number * 10)
use_vs_have = use_vs_have[use_vs_have.feddans != 0]
use_vs_have = use_vs_have[use_vs_have.agri_gard_use_quantity <= 10]
use_vs_have = use_vs_have[use_vs_have.agri_gard_land_quantity <= 10]
#prueba = prueba[prueba.livestock_number_goat <= 100]
#prueba = prueba[prueba.livestock_number_sheep <= 100]
#display(prueba)
use_vs_have = {
    "layout": {
        "title": "Relation between the quantity of land owned and used"
    },
    "data": [
        {

            "x": use_vs_have.agri_gard_land_quantity,
            "y": use_vs_have.agri_gard_use_quantity,
            "mode": "markers",
            "marker": {
                "size": use_vs_have.feddans
            },
            "type": "scatter"
        },
        {

            "x": [0,10],
            "y": [0,10],
            "mode": "lines"
        }
    ]
}
iplot(use_vs_have)

In [46]:
# Inconsistencies
# print agri_set.loc[agri_set.agri_gard_water_harv_farmlands > agri_set.agri_gard_land_quantity]['agri_gard_water_harv_farmlands']
# print agri_set.loc[agri_set.agri_gard_water_harv_farmlands > agri_set.agri_gard_land_quantity]['agri_gard_land_quantity']

agri_set['perc_rainfed_farmlands'] = agri_set['agri_gard_water_harv_farmlands'] / agri_set['agri_gard_land_quantity']

# Estimation to correct the inconsistencies: All feddan have water harvesting access
feddans_med_poor = agri_set.loc[agri_set.agri_gard_use_quantity <= 5]['agri_gard_use_quantity'].sum()

print feddans_med_poor / agri_set.loc[agri_set.agri_gard_use_quantity <= 5, 'deviceid'].count()
print feddans_med_poor / total_surveys

#print agri_set.groupby('perc_rainfed_farmlands').size()
#print agri_set.groupby('agri_gard_farm_tech').size()
#print agri_set.groupby('agri_gard_water_harv_farmlands').size()
all_farm_rainfed = agri_set.loc[agri_set['perc_rainfed_farmlands'] == 1].groupby('agri_gard_use_quantity').size()
agri_set['agri_gard_water_harv_farmlands'].sum()
print agri_set.loc[agri_set.agri_gard_use_quantity > agri_set.agri_gard_land_quantity]['deviceid'].count()
print agri_set.loc[agri_set.agri_gard_land_property == 'own'].groupby('agri_gard_use_quantity').size()
print agri_set.loc[agri_set.agri_gard_land_property == 'rent'].groupby('agri_gard_use_quantity').size()
print agri_set.groupby('agri_gard_land_quantity').size()
print agri_set.groupby('cc_effects_sand_affected_farmlands').size()

agri_set.groupby('agri_gard_training').size()

1.90058479532
0.620229007634
17
agri_gard_use_quantity
0.0       1
1.0      24
2.0      27
3.0       7
4.0       4
5.0       5
6.0       3
8.0       1
10.0      4
400.0     1
dtype: int64
agri_gard_use_quantity
0.0        2
1.0       18
2.0       16
3.0       10
4.0        4
5.0        2
6.0        1
10.0       1
30.0       1
2100.0     2
dtype: int64
agri_gard_land_quantity
0.0       37
1.0       52
2.0       54
3.0       22
4.0       12
5.0       14
6.0        6
7.0        1
8.0        1
10.0       8
50.0       1
400.0      1
2100.0     3
dtype: int64
cc_effects_sand_affected_farmlands
0.0     139
1.0      37
2.0      21
3.0       5
4.0       3
5.0       2
6.0       3
10.0      1
40.0      1
dtype: int64


agri_gard_training
no     203
yes      6
dtype: int64

In [47]:
agri_set_med_poor = agri_set.loc[agri_set.agri_gard_use_quantity <= 5].copy()
agri_set_med_poor.loc[agri_set_med_poor['agri_gard_protected_farmlands'] > agri_set_med_poor['agri_gard_use_quantity'], 'agri_gard_protected_farmlands'] = agri_set_med_poor['agri_gard_use_quantity']
agri_set_med_poor['perc_land_protected'] = agri_set_med_poor.agri_gard_protected_farmlands / agri_set_med_poor.agri_gard_land_quantity
print agri_set_med_poor.groupby('agri_gard_use_quantity').size()
print agri_set_med_poor['agri_gard_protected_farmlands'].sum() / agri_set_med_poor['agri_gard_use_quantity'].sum()
print agri_set_med_poor.groupby('perc_land_protected').size()
print agri_set_med_poor.groupby('perc_land_protected').size().sum()

agri_gard_use_quantity
0.0    11
1.0    60
2.0    63
3.0    18
4.0    10
5.0     9
dtype: int64
0.116923076923
perc_land_protected
0.000000    113
0.100000      1
0.142857      1
0.200000      1
0.333333      4
0.400000      3
0.500000      5
0.666667      1
0.750000      1
1.000000      5
inf           2
dtype: int64
137


In [48]:
#crop_types = ['sorghum', 'wheat', 'bean', 'palm', 'fodder', 'watermelon', 'potato', 'tomato', 'onion', 'cucumber', 'okra', 'other']

types_agri = get_ocurrences(agri_set, 'agri_gard_agri_crops')
types_agri = types_agri / agri_set['deviceid'].count()
print types_agri

crops = {
    "layout": {
        "title": "Main crops",
        "xaxis": {
            "tickangle": -45
        },
        "width": 700,
        "annotations": []
    },
    "data": [
        {
            "x": map(lambda x: x.capitalize(), types_agri.index.values),
            "y": types_agri.values,
            "type": "bar",
            "name": "Livelihoods Strategies",
            "marker": {
                "color": 'rgb(205,258,205)',
                "line": {
                    "color": 'rgb(0,192,0)',
                    "width": 1.5
                }
            },
        }
    ]
}

iplot(crops)

bean          0.891509
fodder        0.056604
okra          0.089623
onion         0.061321
cucumber      0.009434
other         0.051887
palm          0.084906
potato        0.061321
tomato        0.018868
watermelon    0.014151
sorghum       0.207547
wheat         0.575472
dtype: float64


In [49]:
farm_tools = reg_survey.groupby('agri_gard_farm_tools').size()
farm_tech = reg_survey.groupby('agri_gard_farm_tech').size()
seeds = reg_survey.groupby('agri_gard_seeds').size()

create_graphs_HoHH(lambda x: x.groupby('agri_gard_farm_tools').size(),
                         lambda x: x.values,
                         lambda x: x.index.values,
                         lambda x: x.sum(),
                         "Farming tools by Head of Household")

create_graphs_respondent(lambda x: x.groupby('agri_gard_farm_tools').size(),
                         lambda x: x.values,
                         lambda x: x.index.values,
                         lambda x: x.sum(),
                         "Farming tools by respondent")

In [50]:
create_graphs_HoHH(lambda x: x.groupby('agri_gard_farm_tech').size(),
                         lambda x: x.values,
                         lambda x: x.index.values,
                         lambda x: x.sum(),
                         "Farming tools by Head of Household")

create_graphs_respondent(lambda x: x.groupby('agri_gard_farm_tech').size(),
                         lambda x: x.values,
                         lambda x: x.index.values,
                         lambda x: x.sum(),
                         "Farming tools by respondent")

In [51]:
create_graphs_HoHH(lambda x: x.groupby('agri_gard_seeds').size(),
                         lambda x: x.values,
                         lambda x: x.index.values,
                         lambda x: x.sum(),
                         "Farming tools by Head of Household")

create_graphs_respondent(lambda x: x.groupby('agri_gard_seeds').size(),
                         lambda x: x.values,
                         lambda x: x.index.values,
                         lambda x: x.sum(),
                         "Farming tools by respondent")

In [52]:
crops = get_ocurrences(agri_set, 'agri_gard_agri_crops')
print crops

bean          189
fodder         12
okra           19
onion          13
cucumber        2
other          11
palm           18
potato         13
tomato          4
watermelon      3
sorghum        44
wheat         122
dtype: int64


### Forestry (charcoal production)

In [53]:
forestry_set = reg_survey.loc[reg_survey['src_incomes'].str.contains("forestry")]
print forestry_set.groupby('forestry_forestry_type').size()
print get_ocurrences(forestry_set, 'forestry_forestry_type')
forestry_participants =  forestry_set.groupby('forestry_participants').size()
#forestry_set.loc[forestry_set['forestry_participants'].str.contains('women')][['forestry_participants', 'respondant_gender', 'respondant_age', 'respondant_head_hh', 'male_teenager', 'male_adult', 'male_elderly', 'labour_sector']]
forestry_land_property = forestry_set.groupby('forestry_land_property').size()

print forestry_participants
print forestry_land_property

forestry_forestry_type
charcoal fuelwood    1
dtype: int64
charcoal    1
fuelwood    1
dtype: int64
forestry_participants
men    1
dtype: int64
forestry_land_property
community    1
dtype: int64


In [54]:
def create_2pie_graph(serie1, serie2, title, title1, title2):
    fig = {
        "layout": {
            "title": title,
            "annotations": [
                {
                    "font": {
                        "size": 14
                    },
                    "showarrow": False,
                    "text": title1,
                    "x": .20,
                    "y": 1.1
                },
                {
                    "font": {
                        "size": 14
                    },
                    "showarrow": False,
                    #"text": title2 + " (" + str(sum_func(data_global)) + ")",
                    "text": title2,
                    "x": .8,
                    "y": 1.1 
                }
            ]
        },
        "data": [
            {
                "values": serie1.values,
                "labels": serie1.index.values,
                "type": "pie",
                "sort": False,
                "domain": {
                    "x": [.07, .43]
                },
            },
            {
                "values": serie2.values,
                "labels": serie2.index.values,
                "type": "pie",
                "sort": False,
                "domain": {
                    "x": [.57, .93]
                },
            }
        ]
    }

    iplot(fig)
    
create_2pie_graph(agri_set.groupby('agri_gard_training').size(), labour_set.groupby('labour_training').size(), "Population who has received training in their livelihoods strategies", "Farmers", "Laborers")

### Other

In [55]:
other_set = reg_survey.loc[reg_survey['src_incomes'].str.contains("other")]
other_set.groupby('other_training').size()


other_training
no     89
yes    10
dtype: int64

### Coping strategies

In [56]:
cop_strats = get_ocurrences(reg_survey, 'coping_strat')
cop_strats = cop_strats * 100 / total_surveys

fig_cop_strats = {
    "layout": {
        "title": "Coping Strategies",
        "xaxis": {
            "tickangle": -45
        },
        "yaxis": {
            "ticksuffix": "%"
        },
        "width": 700,
        "annotations": []
    },
    "data": [
        {
            "x": map(lambda x: x.capitalize(), cop_strats.index.values),
            "y": cop_strats.values,
            "type": "bar",
            "name": "Livelihoods Strategies",
            "marker": {
                "color": 'rgb(205,205,258)',
                "line": {
                    "color": 'rgb(0,0,192)',
                    "width": 1.5
                }
            },
        }
    ]
}

iplot(fig_cop_strats)

## Climate change effects

In [57]:
cc_effects_on_rain = get_ocurrences(reg_survey, 'changed_rainy_season')

print cc_effects_on_rain

decreased         325
season_changed     45
unsteady          134
increased          49
no_changes         14
unpredictable       1
dtype: int64


In [58]:
sand_mov_sums = reg_survey.groupby('sand_movements').size()
wind_changes = get_ocurrences(reg_survey, 'sand_movements')

print wind_changes
print sand_mov_sums

create_graphs_respondent(lambda x: x.groupby('sand_movements').size(),
                         lambda x: x.values,
                         lambda x: x.index.values,
                         lambda x: x.sum(),
                         "Respondents asked about wind changes in las ten years")

less_windy        22
more_windy       482
no_changes        15
unpredictable      5
dtype: int64
sand_movements
less_windy                22
more_windy               481
no_changes                14
no_changes more_windy      1
unpredictable              5
dtype: int64


In [59]:
agri_set_med_poor.loc[agri_set_med_poor['cc_effects_sand_affected_farmlands'] > agri_set_med_poor['agri_gard_use_quantity'], 'cc_effects_sand_affected_farmlands'] = agri_set_med_poor['agri_gard_use_quantity']
agri_set_med_poor['perc_land_affected'] = agri_set_med_poor.cc_effects_sand_affected_farmlands / agri_set_med_poor.agri_gard_land_quantity
print agri_set_med_poor['cc_effects_sand_affected_farmlands'].sum() / agri_set_med_poor['agri_gard_use_quantity'].sum()
print agri_set_med_poor.groupby('perc_land_affected').size()
print agri_set_med_poor.groupby('perc_land_affected').size().sum()

0.292307692308
perc_land_affected
0.000000    98
0.200000     4
0.250000     1
0.285714     1
0.333333     7
0.400000     2
0.500000    11
0.666667     5
0.750000     1
1.000000    17
inf         10
dtype: int64
157


In [60]:
temps_changes = get_ocurrences(reg_survey, 'temps_change')

print temps_changes

decreased         50
unsteady         207
increased        267
no_changes         1
unpredictable      2
dtype: int64


In [61]:
def perc_ocurrences(survey, column, title, color = 'red'):
    marker_color = 'rgb(258,225,225)'
    line_color = 'rgb(192,0,0)'
    if (color == 'green'):
        marker_color = 'rgb(25,158,25,0.6)'
        line_color = 'rgb(0,70,0)'
    
    ocurrences = get_ocurrences(survey, column).sort_values(ascending = False)
    ocur_perc = 100*ocurrences/survey['deviceid'].size
    ocur_perc = ocur_perc.round(2)
    fig = {
        "layout": {
            "title": title,
            "xaxis": {
                "tickangle": -45
            },
            "width": 700,
            "annotations": []
        },
        "data": [
            {
                "x": map(lambda x: x.capitalize(), ocur_perc.index.values),
                "y": ocur_perc.values,
                "type": "bar",
                "marker": {
                    "color": marker_color,
                    "line": {
                        "color": line_color,
                        "width": 1.5
                    }
                },
            }
        ]
    }
    
    
    for i in range(0, len(ocurrences)):
        fig["layout"]["annotations"].append(
            {
                "x": map(lambda x: x.capitalize(), ocur_perc.index.values)[i],
                "y": ocur_perc.values[i] + 1.7,
                "text": str(ocur_perc.values[i]) + "%",
                "font": dict(
                    family='Arial',
                    size=14,
                    color='rgba(0, 100, 0, 1)'
                ),
                "showarrow": False,
    
            }
        )
    
    iplot(fig)

perc_ocurrences(livestock_set, 'cc_effects_grassland_availability', 'Prueba', 'green')


In [62]:
perc_ocurrences(reg_survey, 'cea_info_source', "Main information sources")

In [63]:
perc_ocurrences(reg_survey, 'cea_trusted_sources', "More trusted information sources")

In [64]:
perc_ocurrences(reg_survey, 'cea_info_needs', "Information needs")

In [65]:
perc_ocurrences(reg_survey, 'cea_org_feedback', "Channel to communicate with SRCS")