# Loading Dataset by Skipping Unnecessary Data

* skiprows is used to skip specified no of rows from start
* skipfooter is used to skip specified no of rows from bottom
* engine='python' is necessary for using skipfooter
* sep='\s+' is used to separate values by space

In [1]:
import pandas as pd

In [3]:
df = pd.read_csv("../datasets/temperatures.txt", sep='\s+', skiprows=7, skipfooter=11, engine='python')

In [4]:
df.head()

Unnamed: 0,Year,Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec,J-D,D-N,DJF,MAM,JJA,SON,Year.1
0,1880,-34,-27,-22,-30,-16,-24,-19,-12,-20,-19,-16,-21,-22,***,****,-23,-18,-18,1880
1,1881,-13,-16,-2,-3,-3,-27,-12,-8,-18,-23,-28,-18,-14,-14,-17,-3,-15,-23,1881
2,1882,3,4,-2,-24,-20,-32,-27,-11,-11,-25,-25,-37,-17,-16,-4,-15,-23,-20,1882
3,1883,-38,-38,-12,-20,-20,-8,-3,-13,-19,-19,-28,-21,-20,-21,-38,-18,-8,-22,1883
4,1884,-20,-14,-31,-36,-33,-36,-31,-24,-29,-25,-29,-25,-28,-28,-18,-33,-31,-28,1884


# Loading Dataset from CSV File

In [5]:
df = pd.read_csv("../datasets/admission_predictions.csv")

In [9]:
df.head()

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admission
0,1,337,118,4,4.5,4.5,9.65,1,0.92
1,2,324,107,4,4.0,4.5,8.87,1,0.76
2,3,316,104,3,3.0,3.5,8.0,1,0.72
3,4,322,110,3,3.5,2.5,8.67,1,0.8
4,5,314,103,2,2.0,3.0,8.21,0,0.65


### Making column as index column instead of default indexes(0 to length(rows)-1)

In [10]:
df = df.set_index("Serial No.",drop=True)

In [11]:
df.head()

Unnamed: 0_level_0,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admission
Serial No.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,337,118,4,4.5,4.5,9.65,1,0.92
2,324,107,4,4.0,4.5,8.87,1,0.76
3,316,104,3,3.0,3.5,8.0,1,0.72
4,322,110,3,3.5,2.5,8.67,1,0.8
5,314,103,2,2.0,3.0,8.21,0,0.65


# Loading Dataset from Database

In [29]:
import mysql.connector as sql

In [30]:
connection = sql.connect(host='localhost', port=3306, database='ofd_db', user='root', password='rootroot')
query = "SELECT * FROM order_history"

In [31]:
dataset = pd.read_sql(query, connection, index_col='id')

In [32]:
dataset.head()

Unnamed: 0_level_0,order_number,status,price,discount,is_delivery,delivery_address,latitude,longitude,is_price_changed,created_date,updated_date,buyer_id,merchant_id
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2,55bdf6e5,Completed,800,0,1,Muzaffar Pur Sialkot,12.23,59.23,0,2020-05-03 12:30:50.030768,2020-05-03 12:30:50.030822,1,2
3,30982326,Completed,800,0,1,Muzaffar Pur Sialkot,12.23,59.23,0,2020-05-03 13:27:35.462236,2020-05-03 13:27:35.462292,1,2
4,1ca26f24,Completed,1200,0,1,Muzaffar Pur Sialkot,12.23,59.23,0,2020-05-03 17:37:26.263438,2020-05-03 17:37:26.263505,1,2
5,582a129e,Completed,2200,0,1,"defence road, sialkot",32.4945,74.5229,0,2020-05-03 17:52:35.227804,2020-05-03 17:52:35.227867,1,2
6,56d4014e,Placed,1200,7,1,Muzaffar Pur Sialkot,12.23,59.23,0,2020-05-03 17:54:03.100679,2020-05-03 17:54:03.100749,1,2


# Loading Dataset from Html Page

In [33]:
tables = pd.read_html("http://www.psmsl.org/data/obtaining/")

In [34]:
dataset = tables[0] 

In [36]:
dataset.head()

Unnamed: 0,Station Name,ID,Lat.,Lon.,GLOSS ID,Country,Date,Coastline,Station
0,BREST,1,48.383,-4.495,242.0,FRA,07/08/2019,190,91
1,SWINOUJSCIE,2,53.917,14.233,,POL,19/10/2001,110,92
2,SHEERNESS,3,51.446,0.743,,GBR,06/06/2019,170,101
3,HOLYHEAD,5,53.314,-4.62,,GBR,06/06/2019,170,191
4,CUXHAVEN 2,7,53.867,8.717,284.0,DEU,01/10/2019,140,12


# Loading Dataset from Remote Location

In [39]:
df = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/balance-scale/balance-scale.data", sep=",")
df.columns = ['Balance Scale','Left Weight','Left Distance','Right Weight','Right Distance']

In [40]:
df.head()

Unnamed: 0,Balance Scale,Left Weight,Left Distance,Right Weight,Right Distance
0,R,1,1,1,2
1,R,1,1,1,3
2,R,1,1,1,4
3,R,1,1,1,5
4,R,1,1,2,1


# Loading Dataset from Text File

In [43]:
df = pd.read_csv("../Datasets/car_evaluation.txt", sep=",")
df.columns = ['buying','maintenance','doors','persons','lug_boot','safety','class']

In [44]:
df.head()

Unnamed: 0,buying,maintenance,doors,persons,lug_boot,safety,class
0,vhigh,vhigh,2,2,small,med,unacc
1,vhigh,vhigh,2,2,small,high,unacc
2,vhigh,vhigh,2,2,med,low,unacc
3,vhigh,vhigh,2,2,med,med,unacc
4,vhigh,vhigh,2,2,med,high,unacc
