In [1]:
# Versão da Linguagem Python

from platform import python_version
print('Versão de Python Neste Jupyter Notebook:', python_version())

Versão de Python Neste Jupyter Notebook: 3.10.5


In [2]:
import warnings
warnings.filterwarnings('ignore')

### Pandas Series

In [3]:
# We will start by creating an empty Pandas Series:
import pandas as pd

s = pd.Series()
print(s)

Series([], dtype: float64)


In [4]:
# Let’s create a Pandas Series of integers and print it:

s = pd.Series([1, 2, 3, 4, 5, 6, 7])
print(s)

0    1
1    2
2    3
3    4
4    5
5    6
6    7
dtype: int64


In [None]:
# Let’s create a Pandas Series of characters:
import pandas as pd

s = pd.Series(['a', 'b', 
               'c', 'd', 
               'e'])
print(s)

In [5]:
# Let’s create a random Pandas Series of float numbers:
import numpy as np

s = pd.Series(np.random.randn(5))
print(s)

0    0.940483
1    0.294006
2    1.486152
3    0.471261
4    1.374364
dtype: float64


In [6]:
s.index

RangeIndex(start=0, stop=5, step=1)

In [7]:
s = pd.Series(np.random.randn(5),
              index=['a', 'b', 
                     'c', 'd', 
                     'e'])

In [8]:
# Let’s create a Pandas Series from a dictionary:
import pandas as pd

dictionary = {'a':1, 'b':2, 
              'c':3, 'd':4, 
              'e':5}

s = pd.Series(dictionary)
print(s)

a    1
b    2
c    3
d    4
e    5
dtype: int64


#### Simple operations with Pandas Series

In [9]:
dictionary = {'a' : 1, 'b' : 2, 
              'c' : 3, 'd': 4, 
              'e': 5}

s1 = pd.Series(dictionary)

array = [1, 2, 3, 4, 5]
s2 = pd.Series(array)

In [10]:
# Selecting one item from the Pandas Series by means of its index:
s1[0] # Select the first element

1

In [11]:
s1['a']

1

In [12]:
s2[0]

1

In [13]:
# Selecting several items from the Pandas Series by means of its index:
s1[[1, 4]]

b    2
e    5
dtype: int64

In [14]:
s1[['b', 'e']]

b    2
e    5
dtype: int64

In [15]:
s2[[1, 4]]

1    2
4    5
dtype: int64

In [16]:
# Get the series starting from an element:
s1[2:]

c    3
d    4
e    5
dtype: int64

In [17]:
s2[2:]

2    3
3    4
4    5
dtype: int64

In [18]:
# Get the series up to one element:
s1[:2]

a    1
b    2
dtype: int64

In [19]:
s2[:2]

0    1
1    2
dtype: int64

In [20]:
# Assign a value:
s1[1] = 99
s1['a'] = 99

In [21]:
s2[1] = 99
print(s2)

0     1
1    99
2     3
3     4
4     5
dtype: int64


In [22]:
# Get a value by index (like dictionary key):
s.get('b')

2

In [23]:
# Add, subtract, multiply, divide, power, and almost any NumPy function that accepts NumPy arrays.
print(s1 + 2)
print(s1 - 2)
print(s1 * 2)
print(s1 / 2)
print(s1 ** 2)

print(np.exp(s1))

a    101
b    101
c      5
d      6
e      7
dtype: int64
a    97
b    97
c     1
d     2
e     3
dtype: int64
a    198
b    198
c      6
d      8
e     10
dtype: int64
a    49.5
b    49.5
c     1.5
d     2.0
e     2.5
dtype: float64
a    9801
b    9801
c       9
d      16
e      25
dtype: int64
a    9.889030e+42
b    9.889030e+42
c    2.008554e+01
d    5.459815e+01
e    1.484132e+02
dtype: float64


In [24]:
# We can perform the same operations over two Pandas Series although
# these must be aligned, that is, to have the same index, in other case,
# perform a Union operation.
s1 + s1 # The indices are aligned

a    198
b    198
c      6
d      8
e     10
dtype: int64

In [25]:
s1 + s2 # The indices are unaligned

a   NaN
b   NaN
c   NaN
d   NaN
e   NaN
0   NaN
1   NaN
2   NaN
3   NaN
4   NaN
dtype: float64

### Pandas DataFrame

In [26]:
# Creating an empty DataFrame:
s = pd.DataFrame()
print(s)

Empty DataFrame
Columns: []
Index: []


In [27]:
s = pd.DataFrame(columns=['A', 'B', 
                          'C', 'D', 
                          'E'])
print(s)

Empty DataFrame
Columns: [A, B, C, D, E]
Index: []


In [28]:
s = pd.DataFrame(columns=['A', 'B', 
                          'C', 'D', 
                          'E'],
index=range(1, 6))
print(s)

     A    B    C    D    E
1  NaN  NaN  NaN  NaN  NaN
2  NaN  NaN  NaN  NaN  NaN
3  NaN  NaN  NaN  NaN  NaN
4  NaN  NaN  NaN  NaN  NaN
5  NaN  NaN  NaN  NaN  NaN


In [29]:
# Creating a DataFrame passing a NumPy array:
array = {'A' : [1, 2, 3, 4], 
         'B' : [4, 3, 2, 1]}

pd.DataFrame(array)

Unnamed: 0,A,B
0,1,4
1,2,3
2,3,2
3,4,1


In [30]:
# Creating a DataFrame passing a NumPy array, with datetime index:
array = {'A':[1, 2, 3, 4], 'B':[4, 3, 2, 1]}
index = pd.DatetimeIndex(['2018-12-01',
                          '2018-12-02',
                          '2018-12-03',
                          '2018-12-04'])

pd.DataFrame(array, index=index)

Unnamed: 0,A,B
2018-12-01,1,4
2018-12-02,2,3
2018-12-03,3,2
2018-12-04,4,1


In [31]:
# Creating a DataFrame passing a Dictionary:
dictionary = {'a':1, 'b':2, 'c':3, 'd':4, 'e': 5}
pd.DataFrame([dictionary])

Unnamed: 0,a,b,c,d,e
0,1,2,3,4,5


In [32]:
# Viewing a DataFrame: We can use some methods to explore the Pandas DataFrame:
# First, we go to create a Pandas DataFrame to work with it.

pd.DataFrame({'A':np.random.randn(10),
              'B':np.random.randn(10),
              'C':np.random.randn(10)})

Unnamed: 0,A,B,C
0,0.002054,-0.254679,-0.757662
1,-0.700608,-0.325884,-0.512507
2,0.072353,-1.439029,0.449625
3,0.305115,0.341107,-2.271271
4,0.654014,-0.866327,-0.538438
5,0.126354,-1.211312,-0.508802
6,1.755083,-0.054438,2.291753
7,-1.502832,0.402771,-0.846099
8,-0.698518,0.722153,0.157411
9,0.249007,0.103209,-0.465787


In [33]:
# Get the first three rows:

df=pd.DataFrame({'A':np.random.randn(10),
                 'B':np.random.randn(10),
                 'C':np.random.randn(10)})
df.head(3)

Unnamed: 0,A,B,C
0,0.204665,-0.655608,-1.224582
1,-0.810945,-1.161863,0.453662
2,1.635004,0.601725,-0.101655


In [34]:
# Get the last three rows:

df=pd.DataFrame({'A':np.random.randn(10),
                 'B':np.random.randn(10),
                 'C':np.random.randn(10)})

df.tail(3)

Unnamed: 0,A,B,C
7,-1.019024,-0.005709,0.419746
8,0.557314,0.842465,1.021277
9,-0.461156,0.622682,-3.323452


In [35]:
# Get the DataFrame’s index:
df=pd.DataFrame({'A':np.random.randn(10),
                 'B':np.random.randn(10),
                 'C':np.random.randn(10)})

df.index

RangeIndex(start=0, stop=10, step=1)

In [36]:
# Get the DataFrame’s columns:

df=pd.DataFrame({'A':np.random.randn(10),
                 'B':np.random.randn(10),
                 'C':np.random.randn(10)})

df.columns

Index(['A', 'B', 'C'], dtype='object')

In [37]:
# Get the DataFrame’s values:

df=pd.DataFrame({'A':np.random.randn(10),
                 'B':np.random.randn(10),
                 'C':np.random.randn(10)})

df.values

array([[-2.54746315,  1.75166309, -0.81485184],
       [-1.04734489,  1.87459596, -1.07595442],
       [ 0.85066175,  0.19625345, -0.1015041 ],
       [ 0.45166526, -0.22416585,  0.23836026],
       [ 0.33703799,  0.14218774,  2.1917028 ],
       [-1.23933002,  0.89858802, -0.98873068],
       [ 0.52066439, -1.69386465, -0.71078183],
       [ 1.04407077,  1.87608011, -0.20044849],
       [ 0.5895178 ,  2.27855286, -0.008104  ],
       [ 2.27963169,  0.81657263,  0.30488982]])

### Importing data in Pandas

#### Importing data from CSV file

In [None]:
df=pd.read_csv('Filename.csv')
type(df)

#### Customizing pandas import

In [None]:
df=pd.read_csv('Filename.csv', sep=' ')

In [None]:
df=pd.read_csv('Filename.csv', 
               usecols=[0, 1, 2, 3, 4, 5],
               nrows=100)

#### Importing data from Excel files

In [None]:
df=pd.read_excel('Filename.xls', sheet_name='Sheet1')

### Indexing and Subsetting

In [None]:
df=pd.read_csv('MSFT.csv',
               usecols=[0, 1, 2, 3, 4])
df.head()
df.shape

#### Selecting a single column

In [None]:
close=df['Close']
close.head()

#### Selecting multiple columns

In [None]:
closevol=df[['Close', 'Volume']]
closevol.head()

#### Selecting rows via []

In [None]:
df=pd.read_csv('TSLA.csv' )
df[100:110]

In [None]:
df[100:110][['Close', 'Volume']]

#### Selecting via `.loc[]` (By label)

In [None]:
df.loc[100:110]

In [None]:
df.loc[100:110, ['Close', 'Volume']]

#### Selecting via `.iloc[]` (By position)

In [None]:
df.iloc[100:110]

In [None]:
df.iloc[100:110, [3, 4]]

#### Boolean indexing

In [None]:
df[df.Close > 110]

In [None]:
df[(df['Close'] > 110) | (df['Close'] < 120)]

### Manipulating a DataFrame

#### Transpose using `.T`

In [None]:
df=pd.read_csv('TSLA.csv' )
df2=df[100:110][['Close', 'Volume']]
df2.T

#### The `.sort_index()` method

In [None]:
df.sort_index()

#### The `.sort_values()` method

In [None]:
df.sort_values(by='Close')

In [None]:
df.sort_values(by=['Open', 'Close'])

#### The `.reindex()` function

In [None]:
df=pd.DataFrame(np.random.randn(5),
                index=['a', 'b', 
                       'c', 'd', 
                       'e'])

df

In [None]:
df.reindex(['b', 'a', 'd', 'c', 'e'])

In [None]:
df = pd.DataFrame(np.random.randn(5))
df.reindex([4,3,2,1,0])

#### Adding a new column

In [None]:
df = pd.DataFrame(np.random.randn(5))

In [None]:
df['new']=1
df

#### Delete an existing column

In [None]:
df = pd.DataFrame(np.random.randn(5, 4))
df

In [None]:
del df[0]

In [None]:
df['new']=1
df

In [None]:
del df['new']

#### The `.at[]` (By label)

In [None]:
df=pd.DataFrame(np.random.randn(5,4),
                index=['a', 'b', 
                       'c', 'd', 
                       'e'],
                
                columns=['A', 'B', 
                         'C', 'D'])

print(df)

In [None]:
df.at['a', 'A']

In [None]:
df.at['a', 'A'] = 0

#### The `.iat[]` (By position)

In [None]:
df=pd.DataFrame(np.random.randn(5, 4),
                index=['a', 'b', 
                       'c', 'd', 
                       'e'],
                
                columns=['A', 'B', 
                         'C', 'D'])

print(df)
df.iat[0, 0]

In [None]:
df.iat[0, 0] = 0

#### Conditional updating of values

In [None]:
df=pd.DataFrame(np.random.randn(5, 4),
                index=['a','b',
                       'c','d',
                       'e'],
                
                columns=['A', 'B', 
                         'C', 'D'])

print(df)

In [None]:
df[df > 0] = 1
df

In [None]:
df['A'][df['A'] < 0] = 1
print(df)

In [None]:
df['A'][(df['B'] < 0 ) & (df['C'] < 0)] = 9
print(df)

#### The `.dropna()` method

In [None]:
df=pd.DataFrame(np.random.randn(5, 4),
                index=['a','b',
                       'c','d',
                       'e'],
                
                columns=['A', 'B', 
                         'C', 'D'])

print(df)

In [None]:
df['A'][(df['B'] < 0 ) & (df['C'] < 0)] = np.nan
print(df)

In [None]:
df=df.dropna()
print(df)

In [None]:
df=df.dropna(axis=1)
print(df)

#### The `.fillna()` method

In [None]:
df=pd.DataFrame(np.random.randn(5, 4),
                index=['a','b',
                       'c','d',
                       'e'],
                
                columns=['A', 'B', 
                         'C', 'D'])

print(df)

In [None]:
df['A'][(df['B'] < 0 ) & (df['C'] < 0)] = np.nan
print(df)

In [None]:
df=df.fillna(999)
print(df)

#### The `.apply()` method

In [38]:
df=pd.DataFrame(np.random.randn(5, 4),
                index=['a','b',
                       'c','d',
                       'e'],
                
                columns=['A', 'B', 
                         'C', 'D'])

print(df)

          A         B         C         D
a -1.884675  0.353800  1.593408 -1.913058
b  1.066666  0.074515 -0.600665  1.402123
c  2.495086 -1.531376  0.253160 -1.479275
d -2.058112  1.510316 -1.103926 -0.610046
e -0.402666 -1.500314  0.818845  1.938761


In [39]:
def square_number(number):
    return number**2

# Test the function
square_number(2)

4

In [40]:
df.apply(square_number, axis=1)

Unnamed: 0,A,B,C,D
a,3.551998,0.125174,2.538949,3.65979
b,1.137776,0.005552,0.360798,1.96595
c,6.225455,2.345113,0.06409,2.188255
d,4.235826,2.281056,1.218653,0.372157
e,0.16214,2.250942,0.670506,3.758796


#### The `.shift()` function

In [41]:
df=pd.DataFrame(np.random.randn(5, 4),
                index=['a','b',
                       'c','d',
                       'e'],
                
                columns=['A', 'B', 
                         'C', 'D'])

print(df)

          A         B         C         D
a -0.605708 -0.812815  1.211774 -0.746435
b  0.671458 -0.525986  0.212611 -0.361595
c -1.106150  0.446077  0.104036 -1.963488
d -0.204762 -0.277867  0.340787  1.205583
e  1.218917 -0.293192 -0.744204  0.418005


In [42]:
df['D'].shift(1)

a         NaN
b   -0.746435
c   -0.361595
d   -1.963488
e    1.205583
Name: D, dtype: float64

In [43]:
df['shift'] = df['D'].shift(-1)

### Statistical Exploratory data analysis

#### The `info()` function

In [44]:
df=pd.DataFrame(np.random.randn(5, 4),
                index=['a','b',
                       'c','d',
                       'e'],
                
                columns=['A', 'B', 
                         'C', 'D'])

print(df)

          A         B         C         D
a -0.838198  1.459958  0.346550 -1.039013
b  0.090031 -1.241810  0.923656 -0.129919
c -2.628749 -1.058693 -2.058679 -0.655140
d -1.032958  2.159265  0.376074 -0.546729
e -1.840865 -0.442724  0.661350 -0.452106


In [45]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, a to e
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   A       5 non-null      float64
 1   B       5 non-null      float64
 2   C       5 non-null      float64
 3   D       5 non-null      float64
dtypes: float64(4)
memory usage: 200.0+ bytes


#### The `describe()` function

In [46]:
df=pd.DataFrame(np.random.randn(5, 4),
                index=['a','b',
                       'c','d',
                       'e'],
                
                columns=['A', 'B', 
                         'C', 'D'])

print(df)

          A         B         C         D
a  1.078450 -0.433161  1.348125 -0.508667
b -0.100164  1.117361  0.047917 -0.448532
c -0.867536 -1.111496  0.732259 -0.278198
d -1.077542  0.854424  0.369244 -0.898241
e  2.235978  0.449882  1.071601 -1.747421


In [47]:
df.describe()

Unnamed: 0,A,B,C,D
count,5.0,5.0,5.0,5.0
mean,0.253837,0.175402,0.713829,-0.776212
std,1.393219,0.928729,0.522697,0.588454
min,-1.077542,-1.111496,0.047917,-1.747421
25%,-0.867536,-0.433161,0.369244,-0.898241
50%,-0.100164,0.449882,0.732259,-0.508667
75%,1.07845,0.854424,1.071601,-0.448532
max,2.235978,1.117361,1.348125,-0.278198


#### The `value_counts()` function

In [48]:
df['A'].value_counts()

 1.078450    1
-0.100164    1
-0.867536    1
-1.077542    1
 2.235978    1
Name: A, dtype: int64

#### The `mean()` function

In [49]:
df['A'].mean() # Specifying a column

0.25383732121475705

In [50]:
df.mean() # By column
df.mean(axis=0) # By column

A    0.253837
B    0.175402
C    0.713829
D   -0.776212
dtype: float64

In [51]:
df.mean(axis=1) # By row

a    0.371187
b    0.154146
c   -0.381243
d   -0.188029
e    0.502510
dtype: float64

#### The `std()` function

In [52]:
df['A'].std() # Specifying a column

1.393218658672347

In [53]:
df.std() # By column
df.std(axis=0) # By column

A    1.393219
B    0.928729
C    0.522697
D    0.588454
dtype: float64

In [54]:
df.std(axis=1) # By row

a    0.979072
b    0.675022
c    0.820621
d    0.947434
e    1.672696
dtype: float64

### Filtering Pandas DataFrame

In [55]:
df=pd.DataFrame(np.random.randn(5, 4),
                index=['a','b',
                       'c','d',
                       'e'],
                
                columns=['A', 'B', 
                         'C', 'D'])

print(df)

          A         B         C         D
a  0.447562  0.478645 -1.475394  2.758552
b -0.723083  1.840985 -0.461110  0.701272
c -1.281894  0.711404 -1.826642  0.336229
d  0.643546  0.690277  0.470318  1.420400
e  0.483270  0.582635 -0.189454 -0.440360


In [56]:
df_filtered = df[df['A'] > 0]
print(df_filtered)

          A         B         C         D
a  0.447562  0.478645 -1.475394  2.758552
d  0.643546  0.690277  0.470318  1.420400
e  0.483270  0.582635 -0.189454 -0.440360


In [57]:
df_filtered = df[(df['A'] > 0) & (df['B'] > 0)]
print(df_filtered)

          A         B         C         D
a  0.447562  0.478645 -1.475394  2.758552
d  0.643546  0.690277  0.470318  1.420400
e  0.483270  0.582635 -0.189454 -0.440360


### Iterating Pandas DataFrame

In [58]:
for item in df.iterrows():
    print(item)

('a', A    0.447562
B    0.478645
C   -1.475394
D    2.758552
Name: a, dtype: float64)
('b', A   -0.723083
B    1.840985
C   -0.461110
D    0.701272
Name: b, dtype: float64)
('c', A   -1.281894
B    0.711404
C   -1.826642
D    0.336229
Name: c, dtype: float64)
('d', A    0.643546
B    0.690277
C    0.470318
D    1.420400
Name: d, dtype: float64)
('e', A    0.483270
B    0.582635
C   -0.189454
D   -0.440360
Name: e, dtype: float64)


### Merge, Append and Concat Pandas DataFrame

In [59]:
# merge function allows us to merge two DataFrame by rows:

df=pd.DataFrame(np.random.randn(5, 4),
                index=['a','b',
                       'c','d',
                       'e'],
                
                columns=['A', 'B', 
                         'C', 'D'])

print(df)

          A         B         C         D
a  0.137969  0.651765  0.685665  0.264536
b -0.915699  0.377587  1.937440 -0.743264
c  0.592601  0.482650  0.842395  0.344094
d -0.523573  0.939528  2.786991  0.726991
e  1.250112 -1.928294  0.548047  0.343058


In [60]:
df=pd.DataFrame(np.random.randn(5, 4),
                index=['a','b',
                       'c','d',
                       'e'],
                
                columns=['A', 'B', 
                         'C', 'D'])

print(df)

          A         B         C         D
a  0.108331 -0.769100  2.122379  0.639116
b  0.855281 -0.676938  0.142872 -0.279182
c  0.174174 -1.401819 -0.548616 -0.354862
d -0.303070  2.008958 -0.568588 -0.040662
e -2.599993  0.684187 -1.285895 -0.775482


In [61]:
df3 = pd.merge(df1, df2)
print(df3)

NameError: name 'df1' is not defined

In [63]:
# append function allows us to append rows from one DataFrame to another DataFrame by rows:

df1=pd.DataFrame(np.random.randn(5, 4),
                 index=['a','b',
                        'c','d',
                        'e'],
                 
                 columns=['A', 'B', 
                          'C', 'D'])

In [64]:
df2=pd.DataFrame(np.random.randn(5, 4),
                 index=['a','b',
                        'c','d',
                        'e'],
                 
                 columns=['A', 'B', 
                          'C', 'D'])

In [65]:
df3 = df1.append(df2)
print(df3)

          A         B         C         D
a -0.862360  0.228184 -1.171874 -0.060418
b -1.304653 -0.325308 -1.242309 -0.359591
c -0.119642  1.498019  0.767861  0.433473
d -0.347885 -0.533698  0.183767 -0.248562
e  0.031573  0.168213  1.378398 -0.099482
a  1.193269  1.263233 -1.493122 -0.565172
b -1.234440  0.504393 -0.867899  0.419651
c  0.569653 -0.160118 -0.357771  0.690350
d  2.081159 -1.070583  0.405211 -1.507889
e  0.156255 -0.097638 -0.435444  0.008405


In [66]:
# concat function allows us to merge two DataFrame by rows or
# Merge, Append and Concat Pandas DataFrame 
# columns:
df1=pd.DataFrame(np.random.randn(5, 4),
                 index=['a','b',
                        'c','d',
                        'e'],
                 
                 columns=['A', 'B', 
                          'C', 'D'])

In [67]:
df2=pd.DataFrame(np.random.randn(5, 4),
                 index=['a','b',
                        'c','d',
                        'e'],
                 
                 columns=['A', 'B', 
                          'C', 'D'])

In [68]:
df3 = pd.concat([df1, df2]) # Concat by row
print(df3)

          A         B         C         D
a -1.095002 -1.285036  0.865067 -1.282341
b -0.053873  0.790710 -2.300837  0.113218
c  0.958043 -1.249775  0.737968 -0.381875
d -0.528264  1.342630 -0.397967  0.523161
e -0.153421 -0.383485 -0.450719  1.353938
a -0.447698 -0.978017  2.494840  0.816261
b  1.318323 -0.776176 -0.509879 -0.075572
c -0.733236 -1.117210  1.688941  0.676138
d -0.177001 -0.976813 -0.072677 -0.261476
e  0.609452  0.947908 -0.782458 -0.052650


In [69]:
df3 = pd.concat([df1, df2], axis=0) # Concat by row
print(df3)

          A         B         C         D
a -1.095002 -1.285036  0.865067 -1.282341
b -0.053873  0.790710 -2.300837  0.113218
c  0.958043 -1.249775  0.737968 -0.381875
d -0.528264  1.342630 -0.397967  0.523161
e -0.153421 -0.383485 -0.450719  1.353938
a -0.447698 -0.978017  2.494840  0.816261
b  1.318323 -0.776176 -0.509879 -0.075572
c -0.733236 -1.117210  1.688941  0.676138
d -0.177001 -0.976813 -0.072677 -0.261476
e  0.609452  0.947908 -0.782458 -0.052650


In [70]:
# Concat by column
df3 = pd.concat([df1, df2], axis=1)
print(df3)

          A         B         C         D         A         B         C  \
a -1.095002 -1.285036  0.865067 -1.282341 -0.447698 -0.978017  2.494840   
b -0.053873  0.790710 -2.300837  0.113218  1.318323 -0.776176 -0.509879   
c  0.958043 -1.249775  0.737968 -0.381875 -0.733236 -1.117210  1.688941   
d -0.528264  1.342630 -0.397967  0.523161 -0.177001 -0.976813 -0.072677   
e -0.153421 -0.383485 -0.450719  1.353938  0.609452  0.947908 -0.782458   

          D  
a  0.816261  
b -0.075572  
c  0.676138  
d -0.261476  
e -0.052650  


### TimeSeries in Pandas

#### Indexing Pandas TimeSeries

In [71]:
rng = pd.date_range('12/1/2018', 
                    periods=30,
                    freq='H')

print(rng)

DatetimeIndex(['2018-12-01 00:00:00', '2018-12-01 01:00:00',
               '2018-12-01 02:00:00', '2018-12-01 03:00:00',
               '2018-12-01 04:00:00', '2018-12-01 05:00:00',
               '2018-12-01 06:00:00', '2018-12-01 07:00:00',
               '2018-12-01 08:00:00', '2018-12-01 09:00:00',
               '2018-12-01 10:00:00', '2018-12-01 11:00:00',
               '2018-12-01 12:00:00', '2018-12-01 13:00:00',
               '2018-12-01 14:00:00', '2018-12-01 15:00:00',
               '2018-12-01 16:00:00', '2018-12-01 17:00:00',
               '2018-12-01 18:00:00', '2018-12-01 19:00:00',
               '2018-12-01 20:00:00', '2018-12-01 21:00:00',
               '2018-12-01 22:00:00', '2018-12-01 23:00:00',
               '2018-12-02 00:00:00', '2018-12-02 01:00:00',
               '2018-12-02 02:00:00', '2018-12-02 03:00:00',
               '2018-12-02 04:00:00', '2018-12-02 05:00:00'],
              dtype='datetime64[ns]', freq='H')


In [72]:
rng = pd.date_range('12/1/2018', 
                    periods=10, 
                    freq='D')

print(rng)

DatetimeIndex(['2018-12-01', '2018-12-02', '2018-12-03', '2018-12-04',
               '2018-12-05', '2018-12-06', '2018-12-07', '2018-12-08',
               '2018-12-09', '2018-12-10'],
              dtype='datetime64[ns]', freq='D')


In [73]:
ts = pd.DataFrame(np.random.randn(len(rng), 4),
                  index=rng, 
                  columns=['A', 'B', 
                           'C', 'D'])

print(ts)

                   A         B         C         D
2018-12-01 -1.021051  0.424820 -0.109607 -0.300068
2018-12-02 -0.114836  1.344437  1.825487  1.402166
2018-12-03 -0.147683  1.319934 -0.641185 -0.044270
2018-12-04 -1.924182 -1.205500 -1.588057  0.951452
2018-12-05  1.649269  1.489803 -0.830438 -0.597075
2018-12-06 -0.727015  1.110855  1.220718 -0.682758
2018-12-07  1.078288  2.104230 -0.486928  0.705603
2018-12-08  1.618043  0.757367  1.169392  0.279253
2018-12-09  1.266129  0.124606 -0.923902  0.581257
2018-12-10  1.800050 -0.454531 -2.368826 -0.689348


In [74]:
ts = pd.Series(np.random.randn(len(rng)), index=rng)
print(ts)

2018-12-01    0.878306
2018-12-02    0.741791
2018-12-03   -0.098107
2018-12-04    1.164733
2018-12-05   -0.456292
2018-12-06    0.038628
2018-12-07   -0.384605
2018-12-08   -1.559226
2018-12-09    0.067127
2018-12-10    0.634041
Freq: D, dtype: float64


In [75]:
df=pd.read_csv('TSLA.csv')
df.tail()

FileNotFoundError: [Errno 2] No such file or directory: 'TSLA.csv'

In [None]:
df = df.set_index('Date')
df.tail()

#### Resampling Pandas TimeSeries

In [76]:
rng = pd.date_range('12/1/2018', 
                    periods=30,
                    freq='H')

ts = pd.DataFrame(np.random.randn(len(rng), 4),
                  index=rng, 
                  columns=['A', 'B', 
                           'C', 'D'])

print(ts)

                            A         B         C         D
2018-12-01 00:00:00 -0.442396 -0.066619 -0.063901  0.032863
2018-12-01 01:00:00 -0.558400 -0.273722 -0.107967  0.763360
2018-12-01 02:00:00  1.028090  0.280019 -0.798736  0.181104
2018-12-01 03:00:00 -1.242946  0.032348  0.069839  1.525887
2018-12-01 04:00:00 -0.412756 -0.204439 -0.221620 -0.741189
2018-12-01 05:00:00 -0.514586  0.368851  0.052441  0.347456
2018-12-01 06:00:00  0.530974 -0.608947  1.161020  0.287120
2018-12-01 07:00:00 -1.067197  0.576996 -0.293537  0.859931
2018-12-01 08:00:00 -0.578944 -0.632184 -0.732454  1.832056
2018-12-01 09:00:00  0.529112 -1.000621 -0.362744  0.049242
2018-12-01 10:00:00  0.339562 -0.616037  0.333667  0.401394
2018-12-01 11:00:00  1.338392  1.616990 -0.887077 -0.454264
2018-12-01 12:00:00  0.305210  0.915916 -0.941981  0.022747
2018-12-01 13:00:00  0.364189  0.855969 -0.823870 -0.873395
2018-12-01 14:00:00  0.574875  0.213437  0.455713  0.485294
2018-12-01 15:00:00  0.374774 -0.812003 

In [77]:
ts = ts.resample("1D").mean()
print(ts)

                   A         B         C         D
2018-12-01  0.020918  0.092713 -0.154141  0.143370
2018-12-02 -0.351461 -0.042502  0.227032 -0.393155


#### Manipulating TimeSeries

In [78]:
rng = pd.date_range('12/1/2018', 
                    periods=30,
                    freq='D')

ts = pd.DataFrame(np.random.randn(len(rng), 4),
                  index=rng, 
                  columns=['A', 'B', 
                           'C', 'D'])

print(ts)

                   A         B         C         D
2018-12-01  0.832450 -0.846065 -0.110172  0.135266
2018-12-02 -1.291002 -0.345322 -1.549687  0.853274
2018-12-03 -0.084574 -0.852135  0.842000 -1.102338
2018-12-04  0.772899  0.708556  0.855254 -0.966423
2018-12-05 -0.096976 -0.742447  0.775705 -0.865556
2018-12-06 -0.023150 -0.172216 -0.327256  0.788510
2018-12-07  0.431581 -0.178212  2.095419 -0.609247
2018-12-08  0.140947 -0.433207 -0.579218 -1.176862
2018-12-09  0.716685 -0.431275 -0.863208  1.214851
2018-12-10 -0.466859 -0.120417  0.423257 -1.240802
2018-12-11  0.257617  0.842578  0.707846 -0.814820
2018-12-12  0.515019 -1.097894 -0.328590 -1.138961
2018-12-13 -1.333810 -0.254673 -0.339453 -0.374795
2018-12-14  0.515163 -0.484293  1.419675  1.854725
2018-12-15 -0.612159 -0.574398 -0.427715  0.291296
2018-12-16 -0.415863 -0.590664  1.266523 -0.785543
2018-12-17 -0.036432 -1.930961 -0.734225 -0.340181
2018-12-18  0.075176  1.424605  1.172824 -0.925440
2018-12-19  0.655491 -1.514402 

In [79]:
# Getting all values from a specific date:
ts['2018-12-15':]

Unnamed: 0,A,B,C,D
2018-12-15,-0.612159,-0.574398,-0.427715,0.291296
2018-12-16,-0.415863,-0.590664,1.266523,-0.785543
2018-12-17,-0.036432,-1.930961,-0.734225,-0.340181
2018-12-18,0.075176,1.424605,1.172824,-0.92544
2018-12-19,0.655491,-1.514402,-0.600422,0.3345
2018-12-20,0.322478,-1.39219,0.254774,-1.47433
2018-12-21,-0.68819,-1.723721,-0.91494,-1.09446
2018-12-22,-0.672328,-0.492349,-1.420736,0.649224
2018-12-23,-0.333547,0.556593,0.594727,0.050134
2018-12-24,-1.807889,-0.166885,0.084225,1.161196


In [80]:
# Getting all values inside a date range:
ts['2018-12-15':'2018-12-20']

Unnamed: 0,A,B,C,D
2018-12-15,-0.612159,-0.574398,-0.427715,0.291296
2018-12-16,-0.415863,-0.590664,1.266523,-0.785543
2018-12-17,-0.036432,-1.930961,-0.734225,-0.340181
2018-12-18,0.075176,1.424605,1.172824,-0.92544
2018-12-19,0.655491,-1.514402,-0.600422,0.3345
2018-12-20,0.322478,-1.39219,0.254774,-1.47433


In [81]:
%reload_ext watermark
%watermark -a "Caique Miranda" -gu "caiquemiranda" -iv

Author: Caique Miranda

Github username: caiquemiranda

numpy : 1.23.0
pandas: 1.4.3



### End.