In [1]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

# What is pandas and Data structures

- pandas contanins data structures and data manipulation tools designed to make data cleaning and analysis fast and easy in Python.

- It is designed for working with tabular or heterogeneous data. Numpy, by contrast, is best suited for working with homogeneous numerical array data.

- Its two workhorse data structures are Series and DataFrame.

# Series

- One-dimensional array-like object containing a sequence of values (of similar types to Numpy types) and an associated array of data labels, called its index.

- A way to think about a Series is as a fixed-length, ordered dict, as it is a mapping of index values to data values.

In [29]:
obj = pd.Series([4, 7, -5, 3]) # Since we don't specify an index for the data, a default one consisting of the integers 0 through n-1 is created.
print(obj)
# print(obj.index)
# print(obj.values)

# Assignar indexs a posteriori
obj.index = [1, 2, 3, 4]

# Assignar indexs en definir
obj2 = pd.Series([4, 7, -5, 3], index = ["d", "b", "a", "c"])

# Create a Series from a dictionary
sdata = {"Ohio": 35000, "Texas": 71000, "Oregon": 16000, "Utah": 5000}
obj3 = pd.Series(sdata)
print(obj3)

# Redefinir indexs, canviant ordre, afegint, traient

# Opció 1, en la creació
states = ["California", "Ohio", "Oregon", "Texas"]
obj4 = pd.Series(sdata, index = states)
print(obj4)

# Opció 2, després en un nou objecte
obj5 = obj4.reindex(["California", "Oregon", "Ohio", "Utah"]) # Si no estava l'índex en l'anterior, col·loca NaN
#print(obj5)

obj6 = obj4.reindex(["California", "Oregon", "Ohio", "Utah"], method = "ffill") # Si hi ha valors NaN, interpola per poder posar algun valor. 
# fill_value = valor, si hi ha valors NaN hi posa aquest valor.
print(obj6)

# Series object and its index have a "name" attribute.
obj4.name = "population"
obj4.index.name = "state"
print(obj4)


0    4
1    7
2   -5
3    3
dtype: int64
Ohio      35000
Texas     71000
Oregon    16000
Utah       5000
dtype: int64
California        NaN
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
dtype: float64
California        NaN
Oregon        16000.0
Ohio          35000.0
Utah          71000.0
dtype: float64
state
California        NaN
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
Name: population, dtype: float64


# DataFrame

- A DataFrame represents a rectangular table of data and contains an ordered collection of columns, each of which can be a different value type (numeric, string, boolean, etc.). The DataFrame has both row and column index.

In [79]:
# OPCIÓ 1: From a dict of equal-length lists or ndarrays from Numpy

data = {"state": ["Ohio", "Ohio", "Ohio", "Nevada", "Nevada", "Nevada"],
"year": [2000, 2001, 2002, 2001, 2002, 2003],
"pop": [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}

frame = pd.DataFrame(data)
print(frame)

# Definir indexs, canviar ordre de les columnes en crear el DataFrame
frame2 = pd.DataFrame(data, index = ["one", "two", "three", "four", "five", "six"] , columns = ["year", "state", "pop"])
# print(frame2)

# Definir indexs a posteriori
frame22 = frame.reindex(index = ["one", "two", "three", "four", "five", "six"], columns=["year", "state", "pop", "crimes"], fill_value=0) # Si afegim una nova columna que tindrà valors NaN, podem usar argument fill_values per omplir-los.
print(frame22)

# frame1 = pd.DataFrame(np.arange(9.).reshape((3,3)), columns=list("bcd"), index=["Ohio", "Texas", "Colorado"]) #ndarrays

# Indexar una Serie de valors repetits a partir d'una Serie dels valors únics - get_indexer

srep = pd.Series(["c", "a", "b", "b", "c", "a"]) # els índexs ara són 0,1,2,3,4,5
unique = pd.Series(srep.unique()) #method unique a la secció Descriptive Statistics dels apunts.

arrindex = pd.Index(unique).get_indexer(srep)
srep2 = srep.reindex(index=arrindex)
print(srep2)



    state  year  pop
0    Ohio  2000  1.5
1    Ohio  2001  1.7
2    Ohio  2002  3.6
3  Nevada  2001  2.4
4  Nevada  2002  2.9
5  Nevada  2003  3.2
       year state  pop  crimes
one       0     0  0.0       0
two       0     0  0.0       0
three     0     0  0.0       0
four      0     0  0.0       0
five      0     0  0.0       0
six       0     0  0.0       0
0    c
1    a
2    b
2    b
0    c
1    a
dtype: object


In [75]:
# OPCIÓ 2: From a nested dict of dicts

popdata = {"Nevada": {2001: 2.4, 2002: 2.9}, "Ohio": {2000: 1.5, 2001: 1.7, 2002: 3.6}}
frame3 = pd.DataFrame(popdata, index = [2000, 2001, 2002]) # the outer dict keys are the columns and the inner keys are the rows indices. He afegit index = per ordenar els anys al meu gust.
print(frame3)

# index and columns have their name attributes

frame3.index.name = "year"
frame3.columns.name = "state"

print(frame3)

      Nevada  Ohio
2000     NaN   1.5
2001     2.4   1.7
2002     2.9   3.6
state  Nevada  Ohio
year               
2000      NaN   1.5
2001      2.4   1.7
2002      2.9   3.6


## Series: Selection

- isnull and notnull to detect missing data

In [73]:
obj2 = pd.Series([4, 7, -5, 3], index = ["d", "b", "a", "c"])

# Filtrar
cond = obj2[obj2 > 0] 
print(cond)

"b" in obj2 # True. Saber si un índex hi és en la Serie

mask = obj2.isin([3, 4])
print(mask) # True o False segons si és 3 o 4 o no.
print(obj2[mask])


# Slicing

print("Els dos últims:\n", obj2[-2:]) 
print("Els elements de b a c:\n", obj2["b":"c"]) # Slicing per índexs/label, INCLOU INICI I FI
print("L'element b és:", obj2["b"])


# Detectar missing data

sdata = {"Ohio": 35000, "Texas": 71000, "Oregon": 16000, "Utah": 5000}
states = ["California", "Ohio", "Oregon", "Texas"]
obj4 = pd.Series(sdata, index = states)

print(pd.isnull(obj4)) 


d    4
b    7
c    3
dtype: int64
d     True
b    False
a    False
c     True
dtype: bool
d    4
c    3
dtype: int64
Els dos últims:
 a   -5
c    3
dtype: int64
Els elements de b a c:
 b    7
a   -5
c    3
dtype: int64
L'element b és: 7
California     True
Ohio          False
Oregon        False
Texas         False
dtype: bool


## DataFrame: Selection

In [23]:
data = {"state": ["Ohio", "Ohio", "Ohio", "Nevada", "Nevada", "Nevada"],
"year": [2000, 2001, 2002, 2001, 2002, 2003],
"pop": [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}

frame = pd.DataFrame(data)
print(frame)

print(frame.index) # Només els índexs de les files. En aquest cas, no definits, 0,1,2,3,4,5
print(frame.values) # Només els valors
print(frame.columns) # Només els índexs de les columnes
print(frame["state"]) # Seleccionar una columna
print(frame[0:2]) # Seleccionar les dues primeres columnes. Usar enters pot portar a errors entre índexs i posició. Millor usar iloc.
print(frame.loc[0]) # Seleccionar una fila by label
print(frame.loc[0:2, ["state", "pop"]]) # Seleccionar un element, o una combinació de files i columnes. Slicing per labels/índexs, INCLOU INICI I FI
print(frame.iloc[0, 1]) # Seleccionar mitjançant posició (integers) i no label/índex

#print(frame.head()) # to view only the first five rows

# Saber si una columna o fila hi és en la taula
"year" in frame.columns # True
6 in frame3.index # False

# Mostrar True/False si data compleix certes condicions
print(frame["pop"] < 2)

# Mostrar la data que compleix certes condicions
print(frame[["state", "pop"]][frame["pop"] < 2])

    state  year  pop
0    Ohio  2000  1.5
1    Ohio  2001  1.7
2    Ohio  2002  3.6
3  Nevada  2001  2.4
4  Nevada  2002  2.9
5  Nevada  2003  3.2
RangeIndex(start=0, stop=6, step=1)
[['Ohio' 2000 1.5]
 ['Ohio' 2001 1.7]
 ['Ohio' 2002 3.6]
 ['Nevada' 2001 2.4]
 ['Nevada' 2002 2.9]
 ['Nevada' 2003 3.2]]
Index(['state', 'year', 'pop'], dtype='object')
0      Ohio
1      Ohio
2      Ohio
3    Nevada
4    Nevada
5    Nevada
Name: state, dtype: object
  state  year  pop
0  Ohio  2000  1.5
1  Ohio  2001  1.7
state    Ohio
year     2000
pop       1.5
Name: 0, dtype: object
  state  pop
0  Ohio  1.5
1  Ohio  1.7
2  Ohio  3.6
2000
0     True
1     True
2    False
3    False
4    False
5    False
Name: pop, dtype: bool
  state  pop
0  Ohio  1.5
1  Ohio  1.7


## Series: Modifications

In [31]:
obj2 = pd.Series([4, 7, -5, 3], index = ["d", "b", "a", "c"])

# Eliminar un valor
# del obj2["a"]

# Nova Serie havent eliminat valors 
obje = obj2.drop(["b", "c"]) # usar ,inplace = True per aplicar-ho al propi objecte

print(obj2)

# Modificar un valor per assignació
obj2["d"] = 5
print(obj2)

# Afegir un nou valor
obj2["e"] = -1
print(obj2)


d    5
b    7
a   -5
c    3
dtype: int64
d    5
b    7
a   -5
c    3
dtype: int64
d    5
b    7
a   -5
c    3
e   -1
dtype: int64


## DataFrame: Modifications

In [15]:
popdata = {"Nevada": {2001: 2.4, 2002: 2.9}, "Ohio": {2000: 1.5, 2001: 1.7, 2002: 3.6}}
frame3 = pd.DataFrame(popdata, index = [2000, 2001, 2002]) 

# Modificar valors per nova assignació
frame3["Nevada"] = [2.1, 2.2, 2.8]
frame3["Ohio"][2000] = 1.3
print(frame3)

# Modificar tota una columna a partir d'una Serie. Si no es posen tots els indexs, posa NaN.
val = pd.Series([2.6, 3], index=[2001, 2002])
frame3["Nevada"] = val
print(frame3)

# Afegir una nova columna
frame3["California"] = [1, 2, 3]
print(frame3)

frame3["Exceed_Ohio"] = frame3["Ohio"] > 2.5 # Nova columna booleana
print(frame3)

# Eliminar una columna
del frame3["Nevada"]

# Crear un nou objecte sense alguna columna o fila
framec = frame3.drop("Ohio", axis = 1) # usar ,inplace = True per aplicar-ho al propi objecte
framef = frame3.drop(2000)

# Transposar el DataFrame
print(frame3.T)


      Nevada  Ohio
2000     2.1   1.3
2001     2.2   1.7
2002     2.8   3.6
      Nevada  Ohio
2000     NaN   1.3
2001     2.6   1.7
2002     3.0   3.6
      Nevada  Ohio  California
2000     NaN   1.3           1
2001     2.6   1.7           2
2002     3.0   3.6           3
      Nevada  Ohio  California  Exceed_Ohio
2000     NaN   1.3           1        False
2001     2.6   1.7           2        False
2002     3.0   3.6           3         True
              2000   2001  2002
Ohio           1.3    1.7   3.6
California       1      2     3
Exceed_Ohio  False  False  True
       Ohio  California  Exceed_Ohio
2000   True        True         True
2001   True       False         True
2002  False       False         True
2000    1.3
2001    1.7
Name: Ohio, dtype: float64


## Series: Operations

- You can use Numpy functions or Numpy-like operations.

- artihmetic methods: add (+), sub (-), div (/), floordiv (//), mul (*), pow (**). Si posem r al davant, radd, rsub... invertim ordre de l'operació: a - b equivalent a a.sub(b) equivalent a b.rsub(a).

In [8]:
obj2 = pd.Series([4, 7, -5, 3], index = ["d", "b", "a", "c"])
sdata = {"Ohio": 35000, "Texas": 71000, "Oregon": 16000, "Utah": 5000}
obj3 = pd.Series(sdata)
states = ["California", "Ohio", "Oregon", "Texas"]
obj4 = pd.Series(sdata, index = states)

print(obj2 * 2)

print(obj3 + obj4) # Sumar els valors que tenen el mateix índex, els que no pot sumar, els converteix en NaN.

print(np.abs(obj2)) # valor absolut. Funció Numpy.

f = lambda x : x - abs(x)
print(obj2.map(f)) # .map aplicar una funció a cada un dels elements.

d     8
b    14
a   -10
c     6
dtype: int64
California         NaN
Ohio           70000.0
Oregon         32000.0
Texas         142000.0
Utah               NaN
dtype: float64
d    4
b    7
a    5
c    3
dtype: int64
d     0
b     0
a   -10
c     0
dtype: int64


## DataFrame: Operations

- artihmetic methods: add (+), sub (-), div (/), floordiv (//), mul (*), pow (**). Si posem r al davant, radd, rsub... invertim ordre de l'operació: a - b equivalent a a.sub(b) equivalent a b.rsub(a).

- Numpy ufuncs also work with pandas objects,

In [39]:
df1 = pd.DataFrame(np.arange(9.).reshape((3,3)), columns=list("bcd"), index=["Ohio", "Texas", "Colorado"])

df2 = pd.DataFrame(np.arange(12.).reshape((4,3)), columns=list("bde"), index=["Utah", "Ohio", "Texas", "Oregon"])

print(df1)
print(df2)

print(df1 + df2) # Cal que índex de fila i columna coincideixin per poder sumar-los. Sinó, NaN.
print(df1.add(df2, fill_value=0)) # Es pot usar els methods per operar(add, sub, div...) i l'argument fill_value per si un element només està en una taula, assignar-li valor a l'altra taula per sumar. Si l'element no està en cap de les dues taules, seguirà apareixent NaN.

            b    c    d
Ohio      0.0  1.0  2.0
Texas     3.0  4.0  5.0
Colorado  6.0  7.0  8.0
          b     d     e
Utah    0.0   1.0   2.0
Ohio    3.0   4.0   5.0
Texas   6.0   7.0   8.0
Oregon  9.0  10.0  11.0
            b   c     d   e
Colorado  NaN NaN   NaN NaN
Ohio      3.0 NaN   6.0 NaN
Oregon    NaN NaN   NaN NaN
Texas     9.0 NaN  12.0 NaN
Utah      NaN NaN   NaN NaN
            b    c     d     e
Colorado  6.0  7.0   8.0   NaN
Ohio      3.0  1.0   6.0   5.0
Oregon    9.0  NaN  10.0  11.0
Texas     9.0  4.0  12.0   8.0
Utah      0.0  NaN   1.0   2.0


In [45]:
# OPERATIONS BETWEEN DATAFRAME AND SERIES
# Arithmetic between them matches the index of the Series on the DataFrame's columns, broadcasting down the rows.

frame = pd.DataFrame(np.arange(12.).reshape((4,3)), index=["Utah", "Ohio", "Texas", "Oregon"], columns=list("bde"))

series = frame.iloc[0] # La primera fila

print(frame)
print(series)

print(frame - series) # resta la fila series a totes les files de frame lligant b-b, d-d, e-e. Si hi hagués algun índex sense match, NaN.

# Si volem lligar índexs per files, i fer broadcast over the columns, cal usar els arithmetic methods: add, sub... i l'argument axis="index"

series2 = frame["d"] # la columna d
print(series2)

print(frame.sub(series2, axis="index")) # resta la columna d a totes les columnes de frame, lligant Utah, Ohio...


          b     d     e
Utah    0.0   1.0   2.0
Ohio    3.0   4.0   5.0
Texas   6.0   7.0   8.0
Oregon  9.0  10.0  11.0
b    0.0
d    1.0
e    2.0
Name: Utah, dtype: float64
          b    d    e
Utah    0.0  0.0  0.0
Ohio    3.0  3.0  3.0
Texas   6.0  6.0  6.0
Oregon  9.0  9.0  9.0
Utah       1.0
Ohio       4.0
Texas      7.0
Oregon    10.0
Name: d, dtype: float64
          b    d    e
Utah   -1.0  0.0  1.0
Ohio   -1.0  0.0  1.0
Texas  -1.0  0.0  1.0
Oregon -1.0  0.0  1.0


In [13]:
# FUNCTION APPLICATION AND MAPPING

frame = pd.DataFrame(np.random.randn(4,3), index=["Utah", "Ohio", "Texas", "Oregon"], columns=list("bde"))
print(frame)

#Numpy function
print(np.abs(frame)) 

# Applying a function on one-dimensional arrays to each column or row. - APPLY
f = lambda x: x.max() - x.min()

print(frame.apply(f)) # Aplica per columnes

print(frame.apply(f, axis="columns")) # aplica per files, és a dir, saltant columnes

def g(x) : # Un altre exemple
    return pd.Series([x.min(), x.max()], index=["min", "max"])

print(frame.apply(g))

# Element-wise Python functions - APPLYMAP

format = lambda x: "%.2f" % x # Formateja valors a floats amb dos decimals

print(frame.applymap(format))



               b         d         e
Utah   -0.619936  0.383376 -1.008632
Ohio   -0.371243  0.958097 -0.199604
Texas  -2.595658 -0.802796 -0.441220
Oregon  0.540372  0.002627 -0.636958
               b         d         e
Utah    0.619936  0.383376  1.008632
Ohio    0.371243  0.958097  0.199604
Texas   2.595658  0.802796  0.441220
Oregon  0.540372  0.002627  0.636958
b    3.136030
d    1.760894
e    0.809028
dtype: float64
Utah      1.392008
Ohio      1.329341
Texas     2.154439
Oregon    1.177330
dtype: float64
            b         d         e
min -2.595658 -0.802796 -1.008632
max  0.540372  0.958097 -0.199604
            b      d      e
Utah    -0.62   0.38  -1.01
Ohio    -0.37   0.96  -0.20
Texas   -2.60  -0.80  -0.44
Oregon   0.54   0.00  -0.64


## Series: Sorting and ranking

In [24]:
obj = pd.Series(range(4), index=["d", "a", "b", "c"])

print(obj.sort_index()) # Ordenat pels índexs

print(obj.sort_values()) # Ordenat pels valors

series = pd.Series([7, -5, 7, 4, 2, 0, 4, 4])

print(series.rank()) # amb el número 1, el valor més petit, i d'aquí en amunt. Si números repetits, se'ls otorga la mitjana de les posicions. Per exemple, els 4 estan en posició 4-5-6, per això els tres reben un 5. I els 7 estan en posició 7 i 8, per això reben un 7.5. Hi ha altres mètodes per valors repetits, com "first", "max", "min", "dense".

print(series.rank(ascending = False, method = "min")) # amb 1 el valor més gran. Si repetits, la posició menor per tots.


a    1
b    2
c    3
d    0
dtype: int64
d    0
a    1
b    2
c    3
dtype: int64
0    7.5
1    1.0
2    7.5
3    5.0
4    3.0
5    2.0
6    5.0
7    5.0
dtype: float64
0    1.0
1    8.0
2    1.0
3    3.0
4    6.0
5    7.0
6    3.0
7    3.0
dtype: float64


## DataFrame: Sorting and ranking

In [43]:
frame = pd.DataFrame(np.random.randn(4,4), index=[3, 2, 4, 1], columns=["d", "a", "b", "c"])
print(frame)

# print(frame.sort_index()) # Per índexs fila
# print(frame.sort_index(axis=1)) # Per índexs columna
# print(frame.sort_values(by="c", ascending=False)) # Per valors d'una columna, de gran a petit

print(frame.rank()) # Rang de petit a gran per columnes
print(frame.rank(axis = "columns", ascending=False)) # Rang de gran a petit per files

          d         a         b         c
3 -1.771059  0.115122  0.257853 -0.912060
2  0.139835 -0.203534  0.835837 -0.125360
4 -1.335450  0.823848  0.422093  0.123598
1 -0.759722  0.870042  0.266013 -0.078854
     d    a    b    c
3  1.0  2.0  1.0  1.0
2  4.0  1.0  4.0  2.0
4  2.0  3.0  3.0  4.0
1  3.0  4.0  2.0  3.0
     d    a    b    c
3  4.0  2.0  1.0  3.0
2  2.0  4.0  1.0  3.0
4  4.0  1.0  2.0  3.0
1  4.0  1.0  2.0  3.0


## Axis indexes with duplicate labels

All of the examples we've looked at have had unique axis labels. While many pandas functions, like reindex, require that the labels be unique, it's not mandatory.

The index's "is_unique" property can tell you whether its labels are unique or not.

## Descriptive Statistics

- Compared with the similar methods found on Numpy arrays, these ones have been built-in handling for missing data.

In [69]:
data = np.array([[1.4, np.nan], [7.1, -4.5], [np.nan, np.nan], [0.75, -1.3]])
df = pd.DataFrame(data, index=list("abcd"), columns=["one","two"])
print(df)

print(df.sum()) # NA values are excluded. Si volem que els contempli i per tant aparegui NaN en la suma, skipna=False.  # Si volem sumar les files, axis=1.

# print(df.idxmax()) # index del valor on es troba el màxim

# print(df.cumsum()) # Suma acumulativa

print(df.describe()) # taula amb un resum d'estadístiques

# Si les dades són no-numèriques mostra altres estadístiques, per exemple en aquesta Serie:

obj = pd.Series(["a", "a", "b", "c"] * 4)
print(obj.describe())

print(obj.unique()) # Valors únics de la Serie, com un Set
print(obj.value_counts()) # Freqüència de cada valor. # value_counts també està disponible com un method de pandas per ser usat en qualsevol array o seqüència

    one  two
a  1.40  NaN
b  7.10 -4.5
c   NaN  NaN
d  0.75 -1.3
one    9.25
two   -5.80
dtype: float64
            one       two
count  3.000000  2.000000
mean   3.083333 -2.900000
std    3.493685  2.262742
min    0.750000 -4.500000
25%    1.075000 -3.700000
50%    1.400000 -2.900000
75%    4.250000 -2.100000
max    7.100000 -1.300000
count     16
unique     3
top        a
freq       8
dtype: object
['a' 'b' 'c']
a    8
b    4
c    4
dtype: int64


In [81]:
# Usar value_counts en un DataFrame aplicant-ho a cada columna, per fer, per exemple, un histograma a posteriori

data = pd.DataFrame({"Qu1": [1, 3, 4, 3, 4], "Qu2": [2, 3, 1, 2, 3], "Qu3": [1, 5, 2, 4, 4]})
print(data)

compt = data.apply(pd.value_counts).fillna(0) # fillna: Si algun valor només està en una columna i per tant en les altres surt NaN ho omplirem per 0.

print(compt)

   Qu1  Qu2  Qu3
0    1    2    1
1    3    3    5
2    4    1    2
3    3    2    4
4    4    3    4
   Qu1  Qu2  Qu3
1  1.0  1.0  1.0
2  0.0  2.0  1.0
3  2.0  2.0  0.0
4  2.0  0.0  2.0
5  0.0  0.0  1.0


### Correlació i covariància

In [57]:
import pandas_datareader.data as web

In [62]:
all_data = {ticker: web.get_data_yahoo(ticker) for ticker in ["AAPL", "IBM", "MSFT", "GOOG"]}

price = pd.DataFrame({ticker: data["Adj Close"] for ticker, data in all_data.items()})

volume = pd.DataFrame({ticker: data["Volume"] for ticker, data in all_data.items()})

returns = price.pct_change() # Compute percent changes of the prices (it's a time series operation)

print(returns.tail())

                AAPL       IBM      MSFT      GOOG
Date                                              
2022-08-19 -0.015102 -0.005034 -0.013854 -0.022671
2022-08-22 -0.023029 -0.020380 -0.029355 -0.025821
2022-08-23 -0.002029 -0.005976 -0.004716 -0.002607
2022-08-24  0.001794 -0.011207 -0.002351 -0.000610
2022-08-25  0.007555  0.004916 -0.000073  0.013949


In [67]:
## Correlació/Covariància entre dues columnes, és a dir, dues Series, alineades per l'índex de les files

returns["MSFT"].corr(returns["IBM"])  # equivalent returns.MSFT.corr(returns.IBM) perquè són Python attributes
returns["MSFT"].cov(returns["IBM"])

# Si volem la taula relacional de totes
print(returns.corr())

# Si volem la correlació/covariància entre una columna(o fila) del DataFrame i una altra Serie o DataFrame, alineats pels índexs:

returns.corrwith(returns.IBM) # Correlació del DataFrame anterior amb la Serie IBM.

          AAPL       IBM      MSFT      GOOG
AAPL  1.000000  0.436557  0.760095  0.685282
IBM   0.436557  1.000000  0.476538  0.444079
MSFT  0.760095  0.476538  1.000000  0.787131
GOOG  0.685282  0.444079  0.787131  1.000000


AAPL    0.436557
IBM     1.000000
MSFT    0.476538
GOOG    0.444079
dtype: float64