In [None]:
#==================================
# Creating, Reading and Writing

In [1]:
import pandas as pd

In [2]:
#A DataFrame is a table. It contains an array of individual entries, each of which has a certain value

In [7]:
#Create a simple Datframe
pd.DataFrame({'Price': [250000], 'Bedrooms': [3]})

Unnamed: 0,Price,Bedrooms
0,250000,3


In [8]:
pd.DataFrame({'Price': [250000, 120000], 'Bedrooms': [3, 1]})


Unnamed: 0,Price,Bedrooms
0,250000,3
1,120000,1


In [9]:
pd.DataFrame({'Price': [250000, 120000], 'Bedrooms': [3, 1]}, index=['House 1', 'House 2'])


Unnamed: 0,Price,Bedrooms
House 1,250000,3
House 2,120000,1


In [10]:
# A Series, by contrast, is a sequence of data values. If a DataFrame is a table, a Series is a list.
# A Series is, in essence, a single column of a DataFrame
# a Series does not have a column name, it only has one overall name

In [11]:
pd.Series([1,2,3,4,5])

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [12]:
pd.Series([200000, 233000, 250000], index=['2018', '2019', '2020'], name = 'House Prices')

2018    200000
2019    233000
2020    250000
Name: House Prices, dtype: int64

In [13]:
# It's helpful to think of a DataFrame as actually being just a bunch of Series "glued together". 

In [17]:
melb_data = pd.read_csv('../../../Datasets/kaggle/melb_data.csv')

In [18]:
melb_data.shape

(13580, 21)

In [19]:
melb_data.head()

Unnamed: 0,Suburb,Address,Rooms,Type,Price,Method,SellerG,Date,Distance,Postcode,...,Bathroom,Car,Landsize,BuildingArea,YearBuilt,CouncilArea,Lattitude,Longtitude,Regionname,Propertycount
0,Mulgrave,35 Bevis St,3,h,9000000,PI,Hall,29/07/2017,18.8,3170,...,1,1.0,744,117.0,1960.0,Monash,-37.93168,145.16126,South-Eastern Metropolitan,7113
1,Canterbury,49 Mangarra Rd,5,h,8000000,VB,Sotheby's,13/05/2017,9.0,3126,...,5,4.0,2079,464.3,1880.0,Boroondara,-37.8179,145.0694,Southern Metropolitan,3265
2,Hawthorn,49 Lisson Gr,4,h,7650000,S,Abercromby's,17/06/2017,5.3,3122,...,2,4.0,1690,284.0,1863.0,Boroondara,-37.82652,145.03052,Southern Metropolitan,11308
3,Kew,15 Barry St,6,h,6500000,S,Jellis,13/08/2016,5.6,3101,...,6,3.0,1334,365.0,1890.0,Boroondara,-37.8029,145.0267,Southern Metropolitan,10331
4,Middle Park,136 Page St,5,h,6400000,S,Marshall,09/09/2017,3.0,3206,...,2,1.0,553,308.0,1920.0,,-37.84908,144.95753,Southern Metropolitan,2019


In [20]:
house_write_test = pd.DataFrame({'Price': [250000, 120000], 'Bedrooms': [3, 1]}, index=['House 1', 'House 2'])

In [21]:
house_write_test.to_csv('write_test.csv')

In [22]:
#================================
#Indexing, Selecting & Assigning

In [24]:
#Native Python accessor objects provide good ways of indexing data. Pandas carries all of these over


In [27]:
melb_data.Price

0        9000000
1        8000000
2        7650000
3        6500000
4        6400000
          ...   
13575     160000
13576     145000
13577     145000
13578     131000
13579      85000
Name: Price, Length: 13580, dtype: int64

In [28]:
melb_data['Price']

0        9000000
1        8000000
2        7650000
3        6500000
4        6400000
          ...   
13575     160000
13576     145000
13577     145000
13578     131000
13579      85000
Name: Price, Length: 13580, dtype: int64

In [29]:
melb_data['Price'][0]

9000000

In [30]:
#pandas has its own accessor operators, loc and iloc. For more advanced operations, these are the ones you're supposed to be using

In [71]:
#select first value in Suburb
melb_data.Suburb.iloc[0]

'Mulgrave'

In [31]:
#retrieve a row
melb_data.iloc[0]

Suburb                             Mulgrave
Address                         35 Bevis St
Rooms                                     3
Type                                      h
Price                               9000000
Method                                   PI
SellerG                                Hall
Date                             29/07/2017
Distance                               18.8
Postcode                               3170
Bedroom2                                  3
Bathroom                                  1
Car                                     1.0
Landsize                                744
BuildingArea                          117.0
YearBuilt                            1960.0
CouncilArea                          Monash
Lattitude                         -37.93168
Longtitude                        145.16126
Regionname       South-Eastern Metropolitan
Propertycount                          7113
Name: 0, dtype: object

In [None]:
# Both loc and iloc are row-first, column-second. This is the opposite of what we do in native Python, which is column-first, row-second.

In [32]:
#retrieve a column
melb_data.iloc[:, 0]

0           Mulgrave
1         Canterbury
2           Hawthorn
3                Kew
4        Middle Park
            ...     
13575       Hawthorn
13576         Coburg
13577         Albion
13578      Caulfield
13579      Footscray
Name: Suburb, Length: 13580, dtype: object

In [33]:
# On its own, the : operator, which also comes from native Python, means "everything"

In [76]:
melb_data.iloc[:3, 0]

0      Mulgrave
1    Canterbury
2      Hawthorn
Name: Suburb, dtype: object

In [35]:
melb_data.iloc[1:3, 0]


1    Canterbury
2      Hawthorn
Name: Suburb, dtype: object

In [36]:
melb_data.iloc[[0,1,2], 0]


0      Mulgrave
1    Canterbury
2      Hawthorn
Name: Suburb, dtype: object

In [37]:
melb_data.iloc[-5:]

Unnamed: 0,Suburb,Address,Rooms,Type,Price,Method,SellerG,Date,Distance,Postcode,...,Bathroom,Car,Landsize,BuildingArea,YearBuilt,CouncilArea,Lattitude,Longtitude,Regionname,Propertycount
13575,Hawthorn,17/17 Park St,1,u,160000,VB,HAR,08/04/2017,4.6,3122,...,1,0.0,322,,2009.0,Boroondara,-37.8198,145.0373,Southern Metropolitan,11308
13576,Coburg,171 Moreland Rd,4,h,145000,PI,Jellis,04/06/2016,7.8,3058,...,1,1.0,536,164.0,1910.0,Moreland,-37.7555,144.9658,Northern Metropolitan,11204
13577,Albion,8/6 Ridley St,1,u,145000,PI,Biggin,28/05/2016,13.9,3020,...,1,1.0,36,,,Brimbank,-37.7833,144.8266,Western Metropolitan,2185
13578,Caulfield,30 Pyne St,4,h,131000,PI,Rodney,25/02/2017,8.9,3162,...,1,2.0,499,155.0,1920.0,Glen Eira,-37.8864,145.0242,Southern Metropolitan,2379
13579,Footscray,202/51 Gordon St,1,u,85000,PI,Burnham,03/09/2016,6.4,3011,...,1,0.0,0,,2007.0,Maribyrnong,-37.7911,144.89,Western Metropolitan,7570


In [38]:
# Label-based selection
# The second paradigm for attribute selection is the one followed by the loc operator: label-based selection. In this paradigm, it's the data index value, not its position, which matters.

In [39]:
melb_data.loc[0, 'Suburb']

'Mulgrave'

In [40]:
# iloc is conceptually simpler than loc because it ignores the dataset's indices. When we use iloc we treat the dataset like a big matrix (a list of lists), one that we have to index into by position. loc, by contrast, uses the information in the indices to do its work. Since your dataset usually has meaningful indices, it's usually easier to do things using loc instead. For example, here's one operation that's much easier using loc:

In [99]:
pd.DataFrame(melb_data.loc[0:2:, ['Rooms', 'Type', 'Price' ]])

Unnamed: 0,Rooms,Type,Price
0,3,sold,9000000
1,5,sold,8000000
2,4,sold,7650000


In [104]:
melb_data.loc[:, ['Rooms', 'Type', 'Price' ]]

Unnamed: 0,Rooms,Type,Price
0,3,sold,9000000
1,5,sold,8000000
2,4,sold,7650000
3,6,sold,6500000
4,5,sold,6400000
...,...,...,...
13575,1,sold,160000
13576,4,sold,145000
13577,1,sold,145000
13578,4,sold,131000


In [103]:
melb_data.loc[[1,52,2673], ['Rooms', 'Type', 'Price' ]]

Unnamed: 0,Rooms,Type,Price
1,5,sold,8000000
52,4,sold,4050000
2673,2,sold,1460000


In [None]:
# cols = ['country', 'province', 'region_1', 'region_2']
# df = pd.DataFrame(reviews.loc[[0,1,10,100],cols])

In [44]:
# When choosing or transitioning between loc and iloc, there is one "gotcha" worth keeping in mind, which is that the two methods use slightly different indexing schemes.

# iloc uses the Python stdlib indexing scheme, where the first element of the range is included and the last one excluded. So 0:10 will select entries 0,...,9. loc, meanwhile, indexes inclusively. So 0:10 will select entries 0,...,10

In [45]:
# Manipulating the index

In [48]:
melb_data.head()

Unnamed: 0,Suburb,Address,Rooms,Type,Price,Method,SellerG,Date,Distance,Postcode,...,Bathroom,Car,Landsize,BuildingArea,YearBuilt,CouncilArea,Lattitude,Longtitude,Regionname,Propertycount
0,Mulgrave,35 Bevis St,3,h,9000000,PI,Hall,29/07/2017,18.8,3170,...,1,1.0,744,117.0,1960.0,Monash,-37.93168,145.16126,South-Eastern Metropolitan,7113
1,Canterbury,49 Mangarra Rd,5,h,8000000,VB,Sotheby's,13/05/2017,9.0,3126,...,5,4.0,2079,464.3,1880.0,Boroondara,-37.8179,145.0694,Southern Metropolitan,3265
2,Hawthorn,49 Lisson Gr,4,h,7650000,S,Abercromby's,17/06/2017,5.3,3122,...,2,4.0,1690,284.0,1863.0,Boroondara,-37.82652,145.03052,Southern Metropolitan,11308
3,Kew,15 Barry St,6,h,6500000,S,Jellis,13/08/2016,5.6,3101,...,6,3.0,1334,365.0,1890.0,Boroondara,-37.8029,145.0267,Southern Metropolitan,10331
4,Middle Park,136 Page St,5,h,6400000,S,Marshall,09/09/2017,3.0,3206,...,2,1.0,553,308.0,1920.0,,-37.84908,144.95753,Southern Metropolitan,2019


In [52]:
# This is useful if you can come up with an index for the dataset which is better than the current one
melb_data.set_index('Index')

KeyError: "None of ['Index'] are in the columns"

In [53]:
#----------------------
# Conditional selection

In [55]:
melb_data.Suburb == 'Hawthorn'

0        False
1        False
2         True
3        False
4        False
         ...  
13575     True
13576    False
13577    False
13578    False
13579    False
Name: Suburb, Length: 13580, dtype: bool

In [56]:
melb_data.loc[melb_data.Suburb == 'Hawthorn']

Unnamed: 0,Suburb,Address,Rooms,Type,Price,Method,SellerG,Date,Distance,Postcode,...,Bathroom,Car,Landsize,BuildingArea,YearBuilt,CouncilArea,Lattitude,Longtitude,Regionname,Propertycount
2,Hawthorn,49 Lisson Gr,4,h,7650000,S,Abercromby's,17/06/2017,5.3,3122,...,2,4.0,1690,284.0,1863.0,Boroondara,-37.82652,145.03052,Southern Metropolitan,11308
10,Hawthorn,17 Fairview St,5,h,5510000,S,RT,03/06/2017,5.3,3122,...,2,5.0,820,300.0,1971.0,Boroondara,-37.83031,145.02973,Southern Metropolitan,11308
15,Hawthorn,49 Berkeley St,3,h,5100000,S,Jellis,03/12/2016,4.6,3122,...,2,3.0,1038,,,Boroondara,-37.83320,145.03660,Southern Metropolitan,11308
18,Hawthorn,50 Kinkora Rd,4,h,5050000,VB,Kay,19/11/2016,4.6,3122,...,4,2.0,905,373.0,1913.0,Boroondara,-37.81680,145.03210,Southern Metropolitan,11308
41,Hawthorn,2 Fordholm Rd,5,h,4250000,PI,Marshall,17/09/2016,4.6,3122,...,4,3.0,1275,435.0,1930.0,Boroondara,-37.82910,145.03080,Southern Metropolitan,11308
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13440,Hawthorn,4/162 Barkers Rd,1,h,300000,VB,Ray,13/05/2017,4.6,3122,...,1,1.0,0,,,Boroondara,-37.81430,145.03190,Southern Metropolitan,11308
13456,Hawthorn,11/140 Riversdale Rd,1,u,292000,S,Woodards,07/11/2016,4.6,3122,...,1,0.0,0,56.0,1966.0,Boroondara,-37.82940,145.03820,Southern Metropolitan,11308
13466,Hawthorn,13/506 Glenferrie Rd,1,u,290000,S,Biggin,22/05/2016,4.6,3122,...,1,1.0,1568,,,Boroondara,-37.83080,145.03400,Southern Metropolitan,11308
13504,Hawthorn,12/5 Summerlea Gr,1,u,275000,SP,hockingstuart,13/05/2017,4.6,3122,...,1,0.0,479,30.0,1970.0,Boroondara,-37.82920,145.03220,Southern Metropolitan,11308


In [57]:
melb_data.loc[(melb_data.Suburb == 'Hawthorn') & (melb_data.Price > 500000)]

Unnamed: 0,Suburb,Address,Rooms,Type,Price,Method,SellerG,Date,Distance,Postcode,...,Bathroom,Car,Landsize,BuildingArea,YearBuilt,CouncilArea,Lattitude,Longtitude,Regionname,Propertycount
2,Hawthorn,49 Lisson Gr,4,h,7650000,S,Abercromby's,17/06/2017,5.3,3122,...,2,4.0,1690,284.0,1863.0,Boroondara,-37.82652,145.03052,Southern Metropolitan,11308
10,Hawthorn,17 Fairview St,5,h,5510000,S,RT,03/06/2017,5.3,3122,...,2,5.0,820,300.0,1971.0,Boroondara,-37.83031,145.02973,Southern Metropolitan,11308
15,Hawthorn,49 Berkeley St,3,h,5100000,S,Jellis,03/12/2016,4.6,3122,...,2,3.0,1038,,,Boroondara,-37.83320,145.03660,Southern Metropolitan,11308
18,Hawthorn,50 Kinkora Rd,4,h,5050000,VB,Kay,19/11/2016,4.6,3122,...,4,2.0,905,373.0,1913.0,Boroondara,-37.81680,145.03210,Southern Metropolitan,11308
41,Hawthorn,2 Fordholm Rd,5,h,4250000,PI,Marshall,17/09/2016,4.6,3122,...,4,3.0,1275,435.0,1930.0,Boroondara,-37.82910,145.03080,Southern Metropolitan,11308
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11709,Hawthorn,13/177 Power St,2,u,525000,S,Biggin,07/11/2016,4.6,3122,...,1,1.0,0,,,Boroondara,-37.82650,145.02570,Southern Metropolitan,11308
11717,Hawthorn,14/181 Power St,2,u,525000,PI,Walshe,17/06/2017,5.3,3122,...,1,1.0,0,,,Boroondara,-37.82700,145.02563,Southern Metropolitan,11308
11721,Hawthorn,3/199 Auburn Rd,2,u,525000,SP,Marshall,27/05/2017,5.3,3122,...,1,1.0,279,,,Boroondara,-37.82676,145.04444,Southern Metropolitan,11308
11756,Hawthorn,3/18 Connell St,2,u,520000,PI,CASTRAN,13/08/2016,4.6,3122,...,1,1.0,0,,,Boroondara,-37.81850,145.02350,Southern Metropolitan,11308


In [58]:
melb_data.loc[(melb_data.Suburb == 'Hawthorn') | (melb_data.Price > 500000)]

Unnamed: 0,Suburb,Address,Rooms,Type,Price,Method,SellerG,Date,Distance,Postcode,...,Bathroom,Car,Landsize,BuildingArea,YearBuilt,CouncilArea,Lattitude,Longtitude,Regionname,Propertycount
0,Mulgrave,35 Bevis St,3,h,9000000,PI,Hall,29/07/2017,18.8,3170,...,1,1.0,744,117.0,1960.0,Monash,-37.93168,145.16126,South-Eastern Metropolitan,7113
1,Canterbury,49 Mangarra Rd,5,h,8000000,VB,Sotheby's,13/05/2017,9.0,3126,...,5,4.0,2079,464.3,1880.0,Boroondara,-37.81790,145.06940,Southern Metropolitan,3265
2,Hawthorn,49 Lisson Gr,4,h,7650000,S,Abercromby's,17/06/2017,5.3,3122,...,2,4.0,1690,284.0,1863.0,Boroondara,-37.82652,145.03052,Southern Metropolitan,11308
3,Kew,15 Barry St,6,h,6500000,S,Jellis,13/08/2016,5.6,3101,...,6,3.0,1334,365.0,1890.0,Boroondara,-37.80290,145.02670,Southern Metropolitan,10331
4,Middle Park,136 Page St,5,h,6400000,S,Marshall,09/09/2017,3.0,3206,...,2,1.0,553,308.0,1920.0,,-37.84908,144.95753,Southern Metropolitan,2019
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13440,Hawthorn,4/162 Barkers Rd,1,h,300000,VB,Ray,13/05/2017,4.6,3122,...,1,1.0,0,,,Boroondara,-37.81430,145.03190,Southern Metropolitan,11308
13456,Hawthorn,11/140 Riversdale Rd,1,u,292000,S,Woodards,07/11/2016,4.6,3122,...,1,0.0,0,56.0,1966.0,Boroondara,-37.82940,145.03820,Southern Metropolitan,11308
13466,Hawthorn,13/506 Glenferrie Rd,1,u,290000,S,Biggin,22/05/2016,4.6,3122,...,1,1.0,1568,,,Boroondara,-37.83080,145.03400,Southern Metropolitan,11308
13504,Hawthorn,12/5 Summerlea Gr,1,u,275000,SP,hockingstuart,13/05/2017,4.6,3122,...,1,0.0,479,30.0,1970.0,Boroondara,-37.82920,145.03220,Southern Metropolitan,11308


In [59]:
#isin is lets you select data whose value "is in" a list of values
melb_data.loc[melb_data.Suburb.isin(['Hawthorn', 'Armadale'])]

Unnamed: 0,Suburb,Address,Rooms,Type,Price,Method,SellerG,Date,Distance,Postcode,...,Bathroom,Car,Landsize,BuildingArea,YearBuilt,CouncilArea,Lattitude,Longtitude,Regionname,Propertycount
2,Hawthorn,49 Lisson Gr,4,h,7650000,S,Abercromby's,17/06/2017,5.3,3122,...,2,4.0,1690,284.0,1863.0,Boroondara,-37.82652,145.03052,Southern Metropolitan,11308
9,Armadale,367 Dandenong Rd,6,h,5525000,S,Marshall,17/09/2016,6.3,3143,...,3,4.0,1491,516.0,1935.0,Stonnington,-37.86020,145.01300,Southern Metropolitan,4836
10,Hawthorn,17 Fairview St,5,h,5510000,S,RT,03/06/2017,5.3,3122,...,2,5.0,820,300.0,1971.0,Boroondara,-37.83031,145.02973,Southern Metropolitan,11308
14,Armadale,52 Adelaide St,5,h,5200000,SP,Marshall,01/07/2017,6.3,3143,...,3,3.0,761,,,Stonnington,-37.85311,145.02847,Southern Metropolitan,4836
15,Hawthorn,49 Berkeley St,3,h,5100000,S,Jellis,03/12/2016,4.6,3122,...,2,3.0,1038,,,Boroondara,-37.83320,145.03660,Southern Metropolitan,11308
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13456,Hawthorn,11/140 Riversdale Rd,1,u,292000,S,Woodards,07/11/2016,4.6,3122,...,1,0.0,0,56.0,1966.0,Boroondara,-37.82940,145.03820,Southern Metropolitan,11308
13466,Hawthorn,13/506 Glenferrie Rd,1,u,290000,S,Biggin,22/05/2016,4.6,3122,...,1,1.0,1568,,,Boroondara,-37.83080,145.03400,Southern Metropolitan,11308
13485,Armadale,15/405 Dandenong Rd,1,u,280000,PI,hockingstuart,10/09/2016,6.3,3143,...,1,1.0,0,38.0,1975.0,Stonnington,-37.86190,145.01850,Southern Metropolitan,4836
13504,Hawthorn,12/5 Summerlea Gr,1,u,275000,SP,hockingstuart,13/05/2017,4.6,3122,...,1,0.0,479,30.0,1970.0,Boroondara,-37.82920,145.03220,Southern Metropolitan,11308


In [63]:
# The second is isnull (and its companion notnull). These methods let you highlight values which are (or are not) empty (NaN).
melb_data.loc[melb_data.Car.isnull()]

Unnamed: 0,Suburb,Address,Rooms,Type,Price,Method,SellerG,Date,Distance,Postcode,...,Bathroom,Car,Landsize,BuildingArea,YearBuilt,CouncilArea,Lattitude,Longtitude,Regionname,Propertycount
456,Albert Park,123 Richardson St,4,h,2550000,S,Marshall,19/08/2017,3.0,3206,...,2,,152,175.0,1910.0,,-37.84663,144.95681,Southern Metropolitan,3280
487,Malvern,101 Stanhope St,3,h,2500000,VB,Marshall,23/09/2017,5.9,3144,...,2,,395,149.0,1880.0,,-37.85892,145.03731,Southern Metropolitan,4675
502,Fitzroy,124 Victoria St,3,h,2478000,S,Nelson,19/08/2017,2.1,3065,...,2,,205,106.0,1885.0,,-37.79933,144.97932,Northern Metropolitan,5825
895,Albert Park,59 Moubray St,3,h,2100000,VB,Cayzer,23/09/2017,3.0,3206,...,2,,137,125.0,1890.0,,-37.84388,144.95303,Southern Metropolitan,3280
1097,Albert Park,121 Richardson St,3,h,1960000,S,hockingstuart,16/09/2017,3.0,3206,...,2,,152,161.0,1900.0,,-37.84662,144.95675,Southern Metropolitan,3280
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11496,St Albans,30 Thorndon Dr,3,h,549500,S,YPA,23/09/2017,14.0,3021,...,1,,546,,,,-37.75020,144.80626,Western Metropolitan,14042
11822,North Melbourne,5 Hardwicke St,2,h,516000,SP,Jellis,23/09/2017,1.8,3051,...,1,,1044,79.0,1970.0,,-37.79830,144.94687,Northern Metropolitan,6821
11844,Prahran,19 Porter St,1,u,515000,S,Biggin,16/09/2017,4.6,3181,...,1,,284,,,,-37.85018,144.99006,Southern Metropolitan,7717
12097,Sunbury,39 Mitchells La,3,h,493000,S,Raine,09/09/2017,31.7,3429,...,1,,650,,,,-37.58562,144.71581,Western Metropolitan,14092


In [64]:
melb_data.loc[melb_data.Car.notnull()]

Unnamed: 0,Suburb,Address,Rooms,Type,Price,Method,SellerG,Date,Distance,Postcode,...,Bathroom,Car,Landsize,BuildingArea,YearBuilt,CouncilArea,Lattitude,Longtitude,Regionname,Propertycount
0,Mulgrave,35 Bevis St,3,h,9000000,PI,Hall,29/07/2017,18.8,3170,...,1,1.0,744,117.0,1960.0,Monash,-37.93168,145.16126,South-Eastern Metropolitan,7113
1,Canterbury,49 Mangarra Rd,5,h,8000000,VB,Sotheby's,13/05/2017,9.0,3126,...,5,4.0,2079,464.3,1880.0,Boroondara,-37.81790,145.06940,Southern Metropolitan,3265
2,Hawthorn,49 Lisson Gr,4,h,7650000,S,Abercromby's,17/06/2017,5.3,3122,...,2,4.0,1690,284.0,1863.0,Boroondara,-37.82652,145.03052,Southern Metropolitan,11308
3,Kew,15 Barry St,6,h,6500000,S,Jellis,13/08/2016,5.6,3101,...,6,3.0,1334,365.0,1890.0,Boroondara,-37.80290,145.02670,Southern Metropolitan,10331
4,Middle Park,136 Page St,5,h,6400000,S,Marshall,09/09/2017,3.0,3206,...,2,1.0,553,308.0,1920.0,,-37.84908,144.95753,Southern Metropolitan,2019
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13575,Hawthorn,17/17 Park St,1,u,160000,VB,HAR,08/04/2017,4.6,3122,...,1,0.0,322,,2009.0,Boroondara,-37.81980,145.03730,Southern Metropolitan,11308
13576,Coburg,171 Moreland Rd,4,h,145000,PI,Jellis,04/06/2016,7.8,3058,...,1,1.0,536,164.0,1910.0,Moreland,-37.75550,144.96580,Northern Metropolitan,11204
13577,Albion,8/6 Ridley St,1,u,145000,PI,Biggin,28/05/2016,13.9,3020,...,1,1.0,36,,,Brimbank,-37.78330,144.82660,Western Metropolitan,2185
13578,Caulfield,30 Pyne St,4,h,131000,PI,Rodney,25/02/2017,8.9,3162,...,1,2.0,499,155.0,1920.0,Glen Eira,-37.88640,145.02420,Southern Metropolitan,2379


In [65]:
# Assigning data

In [66]:
melb_data['Type'] = 'sold'

In [67]:
melb_data.head()

Unnamed: 0,Suburb,Address,Rooms,Type,Price,Method,SellerG,Date,Distance,Postcode,...,Bathroom,Car,Landsize,BuildingArea,YearBuilt,CouncilArea,Lattitude,Longtitude,Regionname,Propertycount
0,Mulgrave,35 Bevis St,3,sold,9000000,PI,Hall,29/07/2017,18.8,3170,...,1,1.0,744,117.0,1960.0,Monash,-37.93168,145.16126,South-Eastern Metropolitan,7113
1,Canterbury,49 Mangarra Rd,5,sold,8000000,VB,Sotheby's,13/05/2017,9.0,3126,...,5,4.0,2079,464.3,1880.0,Boroondara,-37.8179,145.0694,Southern Metropolitan,3265
2,Hawthorn,49 Lisson Gr,4,sold,7650000,S,Abercromby's,17/06/2017,5.3,3122,...,2,4.0,1690,284.0,1863.0,Boroondara,-37.82652,145.03052,Southern Metropolitan,11308
3,Kew,15 Barry St,6,sold,6500000,S,Jellis,13/08/2016,5.6,3101,...,6,3.0,1334,365.0,1890.0,Boroondara,-37.8029,145.0267,Southern Metropolitan,10331
4,Middle Park,136 Page St,5,sold,6400000,S,Marshall,09/09/2017,3.0,3206,...,2,1.0,553,308.0,1920.0,,-37.84908,144.95753,Southern Metropolitan,2019


In [68]:
melb_data['index_backwards'] = range(len(melb_data), 0, -1)

In [69]:
melb_data['index_backwards']

0        13580
1        13579
2        13578
3        13577
4        13576
         ...  
13575        5
13576        4
13577        3
13578        2
13579        1
Name: index_backwards, Length: 13580, dtype: int32

In [70]:
melb_data.head()

Unnamed: 0,Suburb,Address,Rooms,Type,Price,Method,SellerG,Date,Distance,Postcode,...,Car,Landsize,BuildingArea,YearBuilt,CouncilArea,Lattitude,Longtitude,Regionname,Propertycount,index_backwards
0,Mulgrave,35 Bevis St,3,sold,9000000,PI,Hall,29/07/2017,18.8,3170,...,1.0,744,117.0,1960.0,Monash,-37.93168,145.16126,South-Eastern Metropolitan,7113,13580
1,Canterbury,49 Mangarra Rd,5,sold,8000000,VB,Sotheby's,13/05/2017,9.0,3126,...,4.0,2079,464.3,1880.0,Boroondara,-37.8179,145.0694,Southern Metropolitan,3265,13579
2,Hawthorn,49 Lisson Gr,4,sold,7650000,S,Abercromby's,17/06/2017,5.3,3122,...,4.0,1690,284.0,1863.0,Boroondara,-37.82652,145.03052,Southern Metropolitan,11308,13578
3,Kew,15 Barry St,6,sold,6500000,S,Jellis,13/08/2016,5.6,3101,...,3.0,1334,365.0,1890.0,Boroondara,-37.8029,145.0267,Southern Metropolitan,10331,13577
4,Middle Park,136 Page St,5,sold,6400000,S,Marshall,09/09/2017,3.0,3206,...,1.0,553,308.0,1920.0,,-37.84908,144.95753,Southern Metropolitan,2019,13576
