## For Anne

First download and install numpy, seaborn, pandas and scipy.

Then simple exercises.

In [1]:
import numpy as np
import pandas as pd
import scipy
import sys
import math
from scipy.optimize import curve_fit
from lmfit import Model

# for plotting
import matplotlib.pyplot as plt
import matplotlib as mplib
from matplotlib import rc
import seaborn as sns

## 2. show matlplotlib output in notebook
%matplotlib inline

In [2]:
x = 5

### Types: Lists, numpy arrays, dictionaries

In [3]:
my_list = [1,2,3,4] 

In [4]:
my_list * 3

[1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4]

For mathematical expressions use numpy

In [5]:
first_array = np.array(my_list)

In [6]:
print(first_array*3)

[ 3  6  9 12]


In [14]:
# Get the first element from an array
x = first_array[0]
print(x)

1


Dictionaries have key value pairs: The order of the keys change, so if you want an element call by key.

In [10]:
pop_unemp = {"Germany": [80000000, 4], "Belgium": [10000000, 20] }

In [11]:
pop_unemp["Germany"]

[80000000, 4]

### Writing a function

In [19]:
def doub_trip(x):
    double_x = x*2
    triple_x = x*3
    return double_x,triple_x

In [21]:
doub,trip = doub_trip(5)
print("double of 5 =",doub)
print("triple of 5 =",trip)

('double of 5 =', 10)
('triple of 5 =', 15)


#### FOR LOOP

In [63]:
for i in range(10):
    if i<11:
        j = i**2
        print(j)
    

0
1
4
9
16
25
36
49
64
81


## LOADING DATA

In [67]:
def read_data_in(file_in,column_list,lines):
    """Function to quickly read a number of lines from Excel and 
    rename the columns for each data"""
    indat = pd.read_excel(file_in)
    indat = indat.loc[0:lines]
    indat.columns = column_list
    return indat

In [36]:
# Setting column names
# External Debt data
ext_debt_col = ["Date","Total_Debt","Short","Long_4A","Long_State_Owned",\
                        "Long_4B","Private","Financial_Inst","Finance_woBanks",\
                        "Private_Invest","Commercial_Banks","Official","Governmental",\
                        "International","IMF","Other_international"]

ext_debt_in = read_data_in('EVDS_disborc.xlsx', ext_debt_col, 28)

#####
# original column names
print(ext_debt_in.iloc[0:2,:])

# after changin col names
ext_debt_in.columns = ext_debt_col
print(ext_debt_in.iloc[0:2,:])



  Tarih TP DB B01 TP DB B02  TP DB B19  TP DB B20  TP DB B42  TP DB B43  \
0  1989     43911      5745    38166.0    29446.0    38166.0    17780.0   
1  1990     52381      9500    42881.0    33268.0    42881.0    19807.0   

   TP DB B44  TP DB B45  TP DB B46  TP DB B47  TP DB B50  TP DB B51  \
0    10831.0     2639.0       28.0     8158.0    17825.0     9046.0   
1    12154.0     2823.0       22.0     9282.0    19758.0     9810.0   

   TP DB B56  TP DB B57  TP DB B60  
0     8779.0       48.0     3040.0  
1     9948.0        0.0     3838.0  
   Date Total_Debt Short  Long_4A  Long_State_Owned  Long_4B  Private  \
0  1989      43911  5745  38166.0           29446.0  38166.0  17780.0   
1  1990      52381  9500  42881.0           33268.0  42881.0  19807.0   

   Financial_Inst  Finance_woBanks  Private_Invest  Commercial_Banks  \
0         10831.0           2639.0            28.0            8158.0   
1         12154.0           2823.0            22.0            9282.0   

   Official 

### Subsetting pandas data frame

In [64]:
#version 1, to get an array
my_vec= ext_debt_in["Date"]
print("version1 = ", type(my_vec))

# version 2
my_vec2 = ext_debt_in[["Date"]]
print("version2 = ",type(my_vec2))

# version 3 // iloc
my_df1 = ext_debt_in.iloc[0:10,3:6]
print(my_df1.head(2))

('version1 = ', <class 'pandas.core.series.Series'>)
('version2 = ', <class 'pandas.core.frame.DataFrame'>)
   Long_4A  Long_State_Owned  Long_4B
0  38166.0           29446.0  38166.0
1  42881.0           33268.0  42881.0


### Nice things about PANDAS

Here, reading it new data to show something.


In [82]:
# Import - Export - Consumer Prices
imp_exp_col = ["Date","Living","Health","Education","Food_Drinks","Food",\
                        "Export","Import","Invest_export","Invest_import",\
                        "Consume_export","Consume_Import"]

imp_exp_in  = read_data_in('EVDS_ithalat_ihracat.xlsx', imp_exp_col, 149)
#imp_exp_in.info()
imp_exp_in.head(5)


Unnamed: 0,Date,Living,Health,Education,Food_Drinks,Food,Export,Import,Invest_export,Invest_import,Consume_export,Consume_Import
0,2005-12,126.06,111.75,140.69,116.08,115.7,7246.279,11682.578,752.18,2372.985,3404.019,1558.103
1,2006-01,127.66,112.47,140.73,117.61,117.28,5133.049,8145.534,501.481,1340.089,2210.229,844.001
2,2006-02,129.22,113.22,140.7,120.44,120.25,6058.251,9796.22,632.304,1583.855,2801.132,1294.246
3,2006-03,130.0,113.64,140.91,122.0,121.91,7411.102,11605.026,819.647,1960.095,3348.993,1516.396
4,2006-04,130.27,113.96,141.07,122.75,122.69,6456.09,11587.101,742.248,1932.196,2932.833,1425.378


In [83]:
# Let's clean the month info and keep years.

imp_exp_in["Date"] = imp_exp_in["Date"].str.split('-',n=1, expand = True)[0]
imp_exp_in.head(2)

Unnamed: 0,Date,Living,Health,Education,Food_Drinks,Food,Export,Import,Invest_export,Invest_import,Consume_export,Consume_Import
0,2005,126.06,111.75,140.69,116.08,115.7,7246.279,11682.578,752.18,2372.985,3404.019,1558.103
1,2006,127.66,112.47,140.73,117.61,117.28,5133.049,8145.534,501.481,1340.089,2210.229,844.001


In [84]:
imp_exp_in["Date"] = pd.to_numeric(imp_exp_in["Date"])

# APPLY: Great function to know
imp_exp_in[["Health","Living"]] = imp_exp_in[["Health","Living"]].apply(pd.to_numeric)




### AGGRGATE COLUMN WITH FUNCTION MEAN

In [91]:
imp_exp_in = imp_exp_in.groupby(["Date"]).mean()
#imp_exp_in = imp_exp_in.reset_index()
imp_exp_in.head()

# NOW LOC INSTEAD OF ILOC
subset = imp_exp_in.loc["2005":"2008","Health": "Education"]
subset.head()

Unnamed: 0_level_0,Health,Education
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2005,1,1
2006,1,1
2007,1,1
2008,1,1


In [90]:
# Getting date back from being an index to a column name
imp_exp_in = imp_exp_in.reset_index()
imp_exp_in.head()


Unnamed: 0,index,Date,Living,Health,Education,Food_Drinks,Food,Export,Import,Invest_export,Invest_import,Consume_export,Consume_Import
0,0,2005,126.06,111.75,140.69,116.08,115.7,7246.279,11682.578,752.18,2372.985,3404.019,1558.103
1,1,2006,135.184167,116.9525,145.649167,122.945,122.678333,7127.8895,11631.34725,785.280583,1945.629167,3149.206333,1343.027
2,2,2007,150.340833,122.533333,156.143333,138.210833,138.230833,8939.312,14171.892333,1146.211583,2254.535583,3641.321917,1557.837167
3,3,2008,179.866667,123.035,166.695833,155.884167,156.3125,11002.265833,16830.297417,1393.781083,2335.054917,3923.07525,1790.769167
4,4,2009,195.7325,126.655833,176.365833,168.3875,168.870833,8511.883833,11744.034583,926.402167,1788.568167,3394.418583,1607.475167


In [None]:
# write a subset to excel

In [None]:
imp_exp_in[imp_exp_in.columns] = imp_exp_in[imp_exp_in.columns]
