# Numpy Basics

In [26]:
import numpy as np

In [10]:
a = np.arange(2, 6, 0.3)
print(a)

[ 2.   2.3  2.6  2.9  3.2  3.5  3.8  4.1  4.4  4.7  5.   5.3  5.6  5.9]


In [13]:
a.reshape(2, 7)

array([[ 2. ,  2.3,  2.6,  2.9,  3.2,  3.5,  3.8],
       [ 4.1,  4.4,  4.7,  5. ,  5.3,  5.6,  5.9]])

In [15]:
b = np.linspace(1,10,8)
print(b)

[  1.           2.28571429   3.57142857   4.85714286   6.14285714
   7.42857143   8.71428571  10.        ]


In [18]:
c = np.random.rand(3 ,4)
print(c)

[[ 0.02146039  0.31106976  0.34728568  0.37436774]
 [ 0.87846647  0.57713046  0.46154371  0.91077553]
 [ 0.18704243  0.7297342   0.20808856  0.01053344]]


In [20]:
a = np.array([34,12,3,4])
b = np.array([2,3,5,6])
c = a - b
print(c)

[32  9 -2 -2]


In [22]:
a = np.empty([3, 4])
print(a)

[[ 0.02146039  0.31106976  0.34728568  0.37436774]
 [ 0.87846647  0.57713046  0.46154371  0.91077553]
 [ 0.18704243  0.7297342   0.20808856  0.01053344]]


## Create mask

In [27]:
n = np.arange(10000)
print(n)

[   0    1    2 ..., 9997 9998 9999]


Create the mask -> a boolean array that matches the condition

In [28]:
mask = (n>20) & (n<44)
print(mask)

[False False False ..., False False False]


In [29]:
n[mask]

array([21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37,
       38, 39, 40, 41, 42, 43])

## Array manipulation

In [31]:
a = np.arange(12).reshape(3, 4)
print(a)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


In [37]:
a.ravel()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [36]:
a.reshape(4, 3)

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [35]:
print(a)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


### Concatenate arrays

In [41]:
a = np.arange(4).reshape(2,2)
print(a)

[[0 1]
 [2 3]]


In [44]:
b = np.arange(5,9,1).reshape(2,2)
print(b)

[[5 6]
 [7 8]]


In [46]:
np.concatenate((a, b), axis=0)

array([[0, 1],
       [2, 3],
       [5, 6],
       [7, 8]])

In [49]:
c = np.concatenate((a, b), axis=1)
print(c)

[[0 1 5 6]
 [2 3 7 8]]


# Pandas

In [62]:
import pandas as pd
from pandas import *

In [57]:
randn = np.random.randn

In [64]:
df = DataFrame(randn(10,4), columns=['A', 'B', 'C', 'D'])
print(df)

          A         B         C         D
0 -0.788776  1.045616  0.281394 -0.869459
1  0.053010 -0.695406  0.151927 -1.656542
2 -1.952594  0.383680  0.392038 -0.220844
3  0.500809  0.448878 -2.191599  1.793602
4 -0.640844 -0.509546  0.775417  1.172966
5  0.280988  1.024044  1.676121 -0.486726
6  0.638725 -0.230600 -0.743270  0.140924
7  0.387860  0.339913 -1.031407  0.732620
8  0.208936  1.533619  0.909357  1.007017
9  1.190449  0.330294  1.004463  0.240644


In [66]:
df2 = DataFrame(randn(7,3), columns=['A', 'B', 'C'])
print(df2)

          A         B         C
0  0.436813  0.546561 -0.196205
1  0.189591  0.440861 -1.470778
2  0.956239 -0.121441 -1.213886
3 -0.923074  2.270614 -0.558540
4  0.341561  2.750024 -0.535973
5  0.100452 -1.177863  2.255604
6 -0.448996  0.346876 -1.682499


In [67]:
df + df2

Unnamed: 0,A,B,C,D
0,-0.351962,1.592177,0.08519,
1,0.2426,-0.254545,-1.318851,
2,-0.996354,0.262239,-0.821848,
3,-0.422266,2.719492,-2.750139,
4,-0.299283,2.240479,0.239443,
5,0.381441,-0.153819,3.931724,
6,0.189729,0.116276,-2.425769,
7,,,,
8,,,,
9,,,,


In [68]:
df2 * 5

Unnamed: 0,A,B,C
0,2.184067,2.732806,-0.981024
1,0.947953,2.204307,-7.35389
2,4.781196,-0.607207,-6.069428
3,-4.615371,11.35307,-2.7927
4,1.707804,13.750122,-2.679866
5,0.502261,-5.889317,11.278018
6,-2.244978,1.73438,-8.412494


In [69]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 4 columns):
A    10 non-null float64
B    10 non-null float64
C    10 non-null float64
D    10 non-null float64
dtypes: float64(4)
memory usage: 400.0 bytes


# Caso pr√°ctico

In [72]:
Sueldos_hombres = pd.Series([2500,1800,1900,2000,2100], index=["Euskadi", "Murcia", "Madrid", "Barcelona", "Zaragoza"])
print(Sueldos_hombres)

Euskadi      2500
Murcia       1800
Madrid       1900
Barcelona    2000
Zaragoza     2100
dtype: int64


In [73]:
Sueldos_mujeres = pd.Series([2300,1600,1980,1900,2150], index=["Euskadi", "Murcia", "Madrid", "Barcelona", "Zaragoza"])
print(Sueldos_mujeres)

Euskadi      2300
Murcia       1600
Madrid       1980
Barcelona    1900
Zaragoza     2150
dtype: int64


In [74]:
diff = Sueldos_mujeres - Sueldos_hombres
print(diff)

Euskadi     -200
Murcia      -200
Madrid        80
Barcelona   -100
Zaragoza      50
dtype: int64


In [78]:
mask = (diff < -100)
print(diff[mask])

Euskadi   -200
Murcia    -200
dtype: int64


In [80]:
d = [1,2,3,4,5]
c = ['a', 'b', 'c', 'd', 'e']
dict(zip(d, c))

{1: 'a', 2: 'b', 3: 'c', 4: 'd', 5: 'e'}

In [81]:
L = [ 1, 2, 3, 4, 5, 6, 7, 8, 9 ]
L[-2:]

[8, 9]

In [1]:
import pandas as pd

In [2]:
L=[1,2,3,4,5]

In [3]:
T=[int(n*(n+1)/2) for n in L]
print(T)

[1, 3, 6, 10, 15]
