In [4]:
import pandas as pd
import numpy as np

In [7]:
s = pd.Series([1, 3, 5, np.nan, 6, 8])
print(s)

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64


In [9]:
dates = pd.date_range("20130101", periods=6)
print(dates)

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')


In [62]:
#Creating a DataFrame by passing a dictionary of objects where the keys are the column labels and the values are the column values.
df = pd.DataFrame(
    {
        "A": np.array([1.0,2.0,3.0,4.0,5.0,5.5]),
        "B": pd.date_range("20130101", periods=6),
        "C": pd.Series(1, index=list(range(6)), dtype="float32"),
        "D": np.array([3] * 6, dtype="int32"),
        "E": pd.Categorical(["test", "train", "test", "train", "test", "train"]),
        "F": "foo",
    }
)
print(df)

     A          B    C  D      E    F
0  1.0 2013-01-01  1.0  3   test  foo
1  2.0 2013-01-02  1.0  3  train  foo
2  3.0 2013-01-03  1.0  3   test  foo
3  4.0 2013-01-04  1.0  3  train  foo
4  5.0 2013-01-05  1.0  3   test  foo
5  5.5 2013-01-06  1.0  3  train  foo


In [45]:
import pandas

mydataset = {
  'cars': ["BMW", "Volvo", "Ford"],
  'passings': [3, 7, 2]
}

myvar = pandas.DataFrame(mydataset)

print(myvar)

    cars  passings
0    BMW         3
1  Volvo         7
2   Ford         2


In [63]:
df.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

In [64]:
#Viewing data
print(df.head())
print(df.tail())

     A          B    C  D      E    F
0  1.0 2013-01-01  1.0  3   test  foo
1  2.0 2013-01-02  1.0  3  train  foo
2  3.0 2013-01-03  1.0  3   test  foo
3  4.0 2013-01-04  1.0  3  train  foo
4  5.0 2013-01-05  1.0  3   test  foo
     A          B    C  D      E    F
1  2.0 2013-01-02  1.0  3  train  foo
2  3.0 2013-01-03  1.0  3   test  foo
3  4.0 2013-01-04  1.0  3  train  foo
4  5.0 2013-01-05  1.0  3   test  foo
5  5.5 2013-01-06  1.0  3  train  foo


In [65]:
df.index

Index([0, 1, 2, 3, 4, 5], dtype='int64')

In [66]:
df.columns

Index(['A', 'B', 'C', 'D', 'E', 'F'], dtype='object')

In [67]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6,6.0,6.0
mean,3.416667,2013-01-03 12:00:00,1.0,3.0
min,1.0,2013-01-01 00:00:00,1.0,3.0
25%,2.25,2013-01-02 06:00:00,1.0,3.0
50%,3.5,2013-01-03 12:00:00,1.0,3.0
75%,4.75,2013-01-04 18:00:00,1.0,3.0
max,5.5,2013-01-06 00:00:00,1.0,3.0
std,1.744037,,0.0,0.0


df.T                     # Transpose
DataFrame.sort_index()   #sorts by an axis:
DataFrame.sort_values()  #sorts by values:

In [68]:
print(df["A"])
print(df[0:3])

0    1.0
1    2.0
2    3.0
3    4.0
4    5.0
5    5.5
Name: A, dtype: float64
     A          B    C  D      E    F
0  1.0 2013-01-01  1.0  3   test  foo
1  2.0 2013-01-02  1.0  3  train  foo
2  3.0 2013-01-03  1.0  3   test  foo


In [69]:
df.loc[:, ["A", "B"]]   #Selecting a row matching a label

Unnamed: 0,A,B
0,1.0,2013-01-01
1,2.0,2013-01-02
2,3.0,2013-01-03
3,4.0,2013-01-04
4,5.0,2013-01-05
5,5.5,2013-01-06


In [70]:
df.iloc[3]   #Selection by position

A                    4.0
B    2013-01-04 00:00:00
C                    1.0
D                      3
E                  train
F                    foo
Name: 3, dtype: object

In [71]:
#select coloumns
df_selected_columns = df[['A', 'B']]
print(df_selected_columns)

     A          B
0  1.0 2013-01-01
1  2.0 2013-01-02
2  3.0 2013-01-03
3  4.0 2013-01-04
4  5.0 2013-01-05
5  5.5 2013-01-06


In [72]:
df_selected_rows = df.head(3)  # Alternatively, use df.iloc[0:3] 
print(df_selected_rows)

     A          B    C  D      E    F
0  1.0 2013-01-01  1.0  3   test  foo
1  2.0 2013-01-02  1.0  3  train  foo
2  3.0 2013-01-03  1.0  3   test  foo


In [76]:
#filtering
df[df["A"] > 2.0]

Unnamed: 0,A,B,C,D,E,F
2,3.0,2013-01-03,1.0,3,test,foo
3,4.0,2013-01-04,1.0,3,train,foo
4,5.0,2013-01-05,1.0,3,test,foo
5,5.5,2013-01-06,1.0,3,train,foo


In [80]:
df_filtered_multiple = df[(df["A"] > 2.0) & (df["E"] == 'test')]
print(df_filtered_multiple)

     A          B    C  D     E    F
2  3.0 2013-01-03  1.0  3  test  foo
4  5.0 2013-01-05  1.0  3  test  foo


In [81]:
# Sort by coloumn'E' in ascending order
df_sorted = df.sort_values(by='E')
print(df_sorted)

     A          B    C  D      E    F
0  1.0 2013-01-01  1.0  3   test  foo
2  3.0 2013-01-03  1.0  3   test  foo
4  5.0 2013-01-05  1.0  3   test  foo
1  2.0 2013-01-02  1.0  3  train  foo
3  4.0 2013-01-04  1.0  3  train  foo
5  5.5 2013-01-06  1.0  3  train  foo
