# Numpy/SciPy

In [200]:
import numpy as np

## [Arrays](https://docs.python.org/2/library/array.html)

In [33]:
a = np.array([1, 2, 3, 4]) # create numpy 1D array
#type(arr)
a

array([1, 2, 3, 4])

In [28]:
b = np.array([[1, 2, 3, 4],[5, 6, 7, 8]]) # create numpy 2D array
b

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [29]:
b.shape # shape of an array

(2, 4)

In [36]:
np.convolve(a,a) # linear convolution of two sequences

array([ 1,  4, 10, 20, 25, 24, 16])

In [44]:
np.sum(a) # sum of elements

10

In [45]:
np.mean(a) # mean value of the elements

2.5

In [46]:
np.std(a) # stantart desviation of the elements

1.1180339887498949

In [56]:
c = np.array([[8,4],[3,9],[3,6],[9,0]]) 
c.shape

(4, 2)

In [54]:
np.dot(b,c) # compute inner product of two vector

array([[ 59,  40],
       [151, 116]])

In [63]:
def myfunc(a, b):
    "Return a-b if a>b, otherwise return a+b"
    if a > b:
        return a - b
    else:
        return a + b
myfunc(2,3)    

5

In [64]:
vfunc = np.vectorize(myfunc) # turn a scalar function into one which accepts & returns vectors 
vfunc([1, 2, 3, 4], 2)

array([3, 4, 1, 2])

## [Lists](https://docs.python.org/2/tutorial/datastructures.html)

In [11]:
list = [66.25, 333, 333, 1, 1234.5]
#type(list)
list

[66.25, 333, 333, 1, 1234.5]

## [Tuples](https://docs.python.org/2/tutorial/datastructures.html#tuples-and-sequences)

In [17]:
tuple = 'jose', 'angel', 'velasco', 18031991, '06268812R'
tuple

('jose', 'angel', 'velasco', 18031991, '06268812R')

# Pandas

In [65]:
import pandas as pd

### [Intro to data structures](https://pandas.pydata.org/pandas-docs/stable/dsintro.html)

 ## [Panel](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.Panel.html)

In [87]:
p = pd.Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'],
             major_axis = pd.date_range('1/1/2000', periods=5),
             minor_axis = ['A', 'B', 'C', 'D'])
p

<class 'pandas.core.panel.Panel'>
Dimensions: 2 (items) x 5 (major_axis) x 4 (minor_axis)
Items axis: Item1 to Item2
Major_axis axis: 2000-01-01 00:00:00 to 2000-01-05 00:00:00
Minor_axis axis: A to D

## [Series](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.html)

In [68]:
s = pd.Series([1,2,3,4],index=['a','b','c','d'])
s

a    1
b    2
c    3
d    4
dtype: int64

## [DataFrame](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html)

In [146]:
data = {'Country' : ['Spain', 'UK', 'Germany', 'France', 'Ireland', 'Norway'],
        'Capital' : ['Madrid', 'London', 'Berlin', 'Paris', 'Dublin', 'Oslo'],
        'Population_m' : [46.56, 66.54, 82.67, 66.9, 6.57, 5.26],
        'Area_km2' : [505.99, 242.49, 357.17, 640.68, 84.42, 385.20],
        'PIB_b' : [1.252, 2.649, 3.494, 2.488, 0.217, 0.459]}
df = pd.DataFrame(data, columns = ['Country','Capital','Population_m','Area_km2', 'PIB_b'])
df

Unnamed: 0,Country,Capital,Population_m,Area_km2,PIB_b
0,Spain,Madrid,46.56,505.99,1.252
1,UK,London,66.54,242.49,2.649
2,Germany,Berlin,82.67,357.17,3.494
3,France,Paris,66.9,640.68,2.488
4,Ireland,Dublin,6.57,84.42,0.217
5,Norway,Oslo,5.26,385.2,0.459


In [147]:
df.index

RangeIndex(start=0, stop=6, step=1)

In [148]:
df.loc[:,['Country']]

Unnamed: 0,Country
0,Spain
1,UK
2,Germany
3,France
4,Ireland
5,Norway


In [149]:
df.loc[[2],['Country']]

Unnamed: 0,Country
2,Germany


In [150]:
df[['Country']]

Unnamed: 0,Country
0,Spain
1,UK
2,Germany
3,France
4,Ireland
5,Norway


In [151]:
df.loc[df.Country=='Spain']

Unnamed: 0,Country,Capital,Population_m,Area_km2,PIB_b
0,Spain,Madrid,46.56,505.99,1.252


In [152]:
df.loc[df['Country']=='Spain']

Unnamed: 0,Country,Capital,Population_m,Area_km2,PIB_b
0,Spain,Madrid,46.56,505.99,1.252


In [153]:
df = df.drop([0]) # delete first row
df

Unnamed: 0,Country,Capital,Population_m,Area_km2,PIB_b
1,UK,London,66.54,242.49,2.649
2,Germany,Berlin,82.67,357.17,3.494
3,France,Paris,66.9,640.68,2.488
4,Ireland,Dublin,6.57,84.42,0.217
5,Norway,Oslo,5.26,385.2,0.459


In [154]:
df.loc[df['Country']=='Spain']

Unnamed: 0,Country,Capital,Population_m,Area_km2,PIB_b


In [155]:
df.head()

Unnamed: 0,Country,Capital,Population_m,Area_km2,PIB_b
1,UK,London,66.54,242.49,2.649
2,Germany,Berlin,82.67,357.17,3.494
3,France,Paris,66.9,640.68,2.488
4,Ireland,Dublin,6.57,84.42,0.217
5,Norway,Oslo,5.26,385.2,0.459


In [159]:
df.T # transpose dataframe

Unnamed: 0,1,2,3,4,5
Country,UK,Germany,France,Ireland,Norway
Capital,London,Berlin,Paris,Dublin,Oslo
Population_m,66.54,82.67,66.9,6.57,5.26
Area_km2,242.49,357.17,640.68,84.42,385.2
PIB_b,2.649,3.494,2.488,0.217,0.459


In [160]:
df.stack()

1  Country              UK
   Capital          London
   Population_m      66.54
   Area_km2         242.49
   PIB_b             2.649
2  Country         Germany
   Capital          Berlin
   Population_m      82.67
   Area_km2         357.17
   PIB_b             3.494
3  Country          France
   Capital           Paris
   Population_m       66.9
   Area_km2         640.68
   PIB_b             2.488
4  Country         Ireland
   Capital          Dublin
   Population_m       6.57
   Area_km2          84.42
   PIB_b             0.217
5  Country          Norway
   Capital            Oslo
   Population_m       5.26
   Area_km2          385.2
   PIB_b             0.459
dtype: object

In [161]:
df.unstack()

Country       1         UK
              2    Germany
              3     France
              4    Ireland
              5     Norway
Capital       1     London
              2     Berlin
              3      Paris
              4     Dublin
              5       Oslo
Population_m  1      66.54
              2      82.67
              3       66.9
              4       6.57
              5       5.26
Area_km2      1     242.49
              2     357.17
              3     640.68
              4      84.42
              5      385.2
PIB_b         1      2.649
              2      3.494
              3      2.488
              4      0.217
              5      0.459
dtype: object

In [163]:
df.pivot(index='Capital', columns='PIB_b', values='Population_m')

PIB_b,0.217,0.459,2.488,2.649,3.494
Capital,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Berlin,,,,,82.67
Dublin,6.57,,,,
London,,,,66.54,
Oslo,,5.26,,,
Paris,,,66.9,,


In [214]:
df = pd.DataFrame(np.random.randn(3, 3))
df

Unnamed: 0,0,1,2
0,-1.526987,-1.085641,1.351158
1,1.971941,-0.574352,0.856335
2,0.089385,-1.441285,-0.769261


In [215]:
df.applymap(lambda x: "%.2f" % x)

Unnamed: 0,0,1,2
0,-1.53,-1.09,1.35
1,1.97,-0.57,0.86
2,0.09,-1.44,-0.77


In [216]:
df = df.apply(np.sqrt) # returns DataFrame
df

Unnamed: 0,0,1,2
0,,,1.162393
1,1.404258,,0.925384
2,0.298973,,


In [222]:
df = pd.DataFrame(np.random.randn(3, 3))
df

Unnamed: 0,0,1,2
0,1.52913,0.073172,1.23354
1,-0.855979,1.438825,0.426753
2,-1.963364,-1.777128,2.181506


In [225]:
df.max()

0    1.529130
1    1.438825
2    2.181506
dtype: float64

In [226]:
df.min()

0   -1.963364
1   -1.777128
2    0.426753
dtype: float64

In [227]:
df.describe()

Unnamed: 0,0,1,2
count,3.0,3.0,3.0
mean,-0.430071,-0.088377,1.2806
std,1.784776,1.614051,0.878322
min,-1.963364,-1.777128,0.426753
25%,-1.409672,-0.851978,0.830147
50%,-0.855979,0.073172,1.23354
75%,0.336575,0.755998,1.707523
max,1.52913,1.438825,2.181506


In [257]:
df1 = pd.DataFrame(np.random.randn(3, 3))
df2 = pd.DataFrame(np.random.randn(3, 3))
df3 = pd.concat([df1,df2])
df3

Unnamed: 0,0,1,2
0,-0.153096,0.94702,0.120566
1,0.44115,-0.532233,0.486108
2,-0.35186,0.223461,2.061292
0,-0.914761,-0.187499,-1.98981
1,0.187215,-0.527677,0.990698
2,-0.545015,-1.871912,-0.057383


In [258]:
df3 = df3.reset_index()
df3

Unnamed: 0,index,0,1,2
0,0,-0.153096,0.94702,0.120566
1,1,0.44115,-0.532233,0.486108
2,2,-0.35186,0.223461,2.061292
3,0,-0.914761,-0.187499,-1.98981
4,1,0.187215,-0.527677,0.990698
5,2,-0.545015,-1.871912,-0.057383


In [259]:
df3 = df3.drop('index', axis=1)
df3

Unnamed: 0,0,1,2
0,-0.153096,0.94702,0.120566
1,0.44115,-0.532233,0.486108
2,-0.35186,0.223461,2.061292
3,-0.914761,-0.187499,-1.98981
4,0.187215,-0.527677,0.990698
5,-0.545015,-1.871912,-0.057383
