In [89]:
import pandas as pd
import numpy as np
import openpyxl

### Creating and Manipulating Dataframes

In [12]:
df2 = pd.DataFrame(
    {
        "A": 1.0,
        "B": pd.Timestamp("20130102"),
        "C": pd.Series(1, index=list(range(4)), dtype="float32"),
        "D": np.array([3] * 4, dtype="int32"),
        "E": pd.Categorical(["test", "train", "test", "train"]),
        "F": "foo",
    }
)

In [13]:
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [14]:
df2.dtypes

A          float64
B    datetime64[s]
C          float32
D            int32
E         category
F           object
dtype: object

In [15]:
dates = pd.date_range("20130101", periods=6)

In [16]:
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [17]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list("ABCD"))


In [18]:
df

Unnamed: 0,A,B,C,D
2013-01-01,1.020579,0.681374,1.517496,-0.568167
2013-01-02,1.931734,0.306294,0.77448,0.574765
2013-01-03,1.371006,0.432753,-0.197505,0.49923
2013-01-04,0.096502,-1.550923,0.109702,0.960045
2013-01-05,-0.711432,0.830442,0.417824,-0.131395
2013-01-06,-0.365043,-0.631765,0.750336,2.422611


In [19]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [20]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [22]:
df.to_numpy()

array([[ 1.02057948,  0.68137404,  1.51749601, -0.56816736],
       [ 1.9317342 ,  0.30629395,  0.77447959,  0.57476536],
       [ 1.37100557,  0.43275328, -0.19750514,  0.49922963],
       [ 0.09650153, -1.55092313,  0.10970222,  0.96004549],
       [-0.71143155,  0.8304425 ,  0.41782402, -0.13139465],
       [-0.36504292, -0.63176467,  0.7503356 ,  2.42261067]])

In [25]:
df.dtypes

A    float64
B    float64
C    float64
D    float64
dtype: object

In [26]:
df2.dtypes

A          float64
B    datetime64[s]
C          float32
D            int32
E         category
F           object
dtype: object

In [27]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.557224,0.011363,0.562055,0.626182
std,1.042933,0.920729,0.599299,1.035159
min,-0.711432,-1.550923,-0.197505,-0.568167
25%,-0.249657,-0.39725,0.186733,0.026261
50%,0.558541,0.369524,0.58408,0.536997
75%,1.283399,0.619219,0.768444,0.863725
max,1.931734,0.830442,1.517496,2.422611


In [28]:
Cardata = { "Mercedes": [2, 4, 0, 4, 0, 3], 
            "Ford": [3, 0, 0, 1, 6, 12], 
            "Tata":[9, 3, 4, 1, 0, 0], 
            "Renault":[12, 1, 0, 0, 3, 1]
          }

In [29]:
Carsales = pd.DataFrame(Cardata)

In [30]:
Carsales

Unnamed: 0,Mercedes,Ford,Tata,Renault
0,2,3,9,12
1,4,0,3,1
2,0,0,4,0
3,4,1,1,0
4,0,6,0,3
5,3,12,0,1


In [35]:
newsales = pd.DataFrame({"Mercedes":0, "Ford":1},index=[4])
Carsales = pd.concat([Carsales, newsales])

In [36]:
Carsales

Unnamed: 0,Mercedes,Ford,Tata,Renault
0,2,3,9.0,12.0
1,4,0,3.0,1.0
2,0,0,4.0,0.0
3,4,1,1.0,0.0
4,0,6,0.0,3.0
5,3,12,0.0,1.0
4,0,1,,


In [43]:
Carsales.loc[:,'Mercedes']

0    2
1    4
2    0
3    4
4    0
5    3
4    0
Name: Mercedes, dtype: int64

In [64]:
Jaguar = np.random.randint(1,10,7)
Carsales.insert(loc=4,column='Jaguar',value=Jaguar,allow_duplicates=False)

<h3>Dropping/ deleting columns</h3>

In [80]:
# Carsales.drop(columns=['Tesla'],inplace=True)

In [78]:
Carsales

Unnamed: 0,Mercedes,Ford,Tata,Jaguar,Renault
0,2,3,9.0,6,12.0
1,4,0,3.0,7,1.0
2,0,0,4.0,2,0.0
3,4,1,1.0,4,0.0
4,0,6,0.0,3,3.0
5,3,12,0.0,5,1.0
4,0,1,,4,


In [71]:
Mazda = np.random.random(7)
Tata = np.linspace(1,100,7,5)
new_sales = list(zip(Mazda,Tata))

In [72]:
Carsales2 = pd.DataFrame(new_sales, columns=['Mazda','Tata'])

In [73]:
Carsales2

Unnamed: 0,Mazda,Tata
0,0.410944,1.0
1,0.726588,17.5
2,0.15837,34.0
3,0.922379,50.5
4,0.29378,67.0
5,0.818009,83.5
6,0.54281,100.0


In [91]:
Carsales3 = pd.concat([Carsales,Carsales2],ignore_index=True)

In [92]:
Carsales3

Unnamed: 0,Mercedes,Ford,Tata,Jaguar,Renault,Mazda
0,2.0,3.0,9.0,6.0,12.0,
1,4.0,0.0,3.0,7.0,1.0,
2,0.0,0.0,4.0,2.0,0.0,
3,4.0,1.0,1.0,4.0,0.0,
4,0.0,6.0,0.0,3.0,3.0,
5,3.0,12.0,0.0,5.0,1.0,
6,0.0,1.0,,4.0,,
7,,,1.0,,,0.410944
8,,,17.5,,,0.726588
9,,,34.0,,,0.15837


In [94]:
Carsales3.to_csv('Carsales.csv')
Carsales3.to_excel('Carsales.xlsx')

## Deleting Dataframe

In [97]:
del(Carsales)

### Delete a row based on an index

In [99]:
Carsales3.drop([20])

Unnamed: 0,Mercedes,Ford,Tata,Jaguar,Renault,Mazda
0,2.0,3.0,9.0,6.0,12.0,
1,4.0,0.0,3.0,7.0,1.0,
2,0.0,0.0,4.0,2.0,0.0,
3,4.0,1.0,1.0,4.0,0.0,
4,0.0,6.0,0.0,3.0,3.0,
5,3.0,12.0,0.0,5.0,1.0,
6,0.0,1.0,,4.0,,
7,,,1.0,,,0.410944
8,,,17.5,,,0.726588
9,,,34.0,,,0.15837
