In [56]:
import pandas as pd
melb_data = pd.read_csv('data/melb_data_ps.csv', sep=',')
melb_df = melb_data.copy()

In [57]:
display(melb_df['Date'])

0         3/12/2016
1         4/02/2016
2         4/03/2017
3         4/03/2017
4         4/06/2016
            ...    
13575    26/08/2017
13576    26/08/2017
13577    26/08/2017
13578    26/08/2017
13579    26/08/2017
Name: Date, Length: 13580, dtype: object

In [58]:
# Преобразуем столбец с датой к стандартному виду datetime

melb_df['Date'] = pd.to_datetime(melb_df['Date'], dayfirst=True)
display(melb_df['Date'])

0       2016-12-03
1       2016-02-04
2       2017-03-04
3       2017-03-04
4       2016-06-04
           ...    
13575   2017-08-26
13576   2017-08-26
13577   2017-08-26
13578   2017-08-26
13579   2017-08-26
Name: Date, Length: 13580, dtype: datetime64[ns]

In [59]:
years_sold = melb_df['Date'].dt.year     # получаем и анализируем из даты год продажи
print(years_sold)
print('Min year sold:', years_sold.min())
print('Max year sold:', years_sold.max())
print('Mode year sold:', years_sold.mode()[0])

0        2016
1        2016
2        2017
3        2017
4        2016
         ... 
13575    2017
13576    2017
13577    2017
13578    2017
13579    2017
Name: Date, Length: 13580, dtype: int64
Min year sold: 2016
Max year sold: 2017
Mode year sold: 2017


In [60]:
melb_df['MonthSale'] = melb_df['Date'].dt.month    # получаем из даты месяц продажи, заносим в новый
                                                   # столбец таблицы и анализируем
melb_df['MonthSale'].value_counts(normalize=True)

5     0.149411
7     0.145950
9     0.135862
6     0.134757
8     0.114138
11    0.082032
4     0.069882
3     0.049926
12    0.044698
10    0.040574
2     0.032622
1     0.000147
Name: MonthSale, dtype: float64

In [61]:
delta_days = melb_df['Date'] - pd.to_datetime('2016-01-01')   # вычисляем интервал в днях
display(delta_days)

0       337 days
1        34 days
2       428 days
3       428 days
4       155 days
          ...   
13575   603 days
13576   603 days
13577   603 days
13578   603 days
13579   603 days
Name: Date, Length: 13580, dtype: timedelta64[ns]

In [62]:
display(delta_days.dt.days)     # выводим те же интервалы в форме чисел

0        337
1         34
2        428
3        428
4        155
        ... 
13575    603
13576    603
13577    603
13578    603
13579    603
Name: Date, Length: 13580, dtype: int64

In [63]:
melb_df['AgeBuilding'] = melb_df['Date'].dt.year - melb_df['YearBuilt']  # Возраст строения на момент продажи
display(melb_df['AgeBuilding'])

0         46
1        116
2        117
3         47
4          2
        ... 
13575     36
13576     22
13577     20
13578     97
13579     97
Name: AgeBuilding, Length: 13580, dtype: int64

In [64]:
melb_df = melb_df.drop('YearBuilt', axis=1)    # Удаляем из таблицы столбец с годом постройки

In [65]:
melb_df['WeekdaySale'] = melb_df['Date'].dt.dayofweek   # сколько было продано в субботу и воскресенье
per1 = melb_df[(melb_df['WeekdaySale']==6)|(melb_df['WeekdaySale']==5)]['WeekdaySale']
display(len(per1))

12822

In [66]:
def get_weekend(weekday):
    if weekday==6 or weekday==5:
        return 1
    return 0

In [67]:
melb_df['Weekend'] = melb_df['WeekdaySale'].apply(get_weekend)
melb_df.head()

Unnamed: 0,index,Suburb,Address,Rooms,Type,Price,Method,SellerG,Date,Distance,...,CouncilArea,Lattitude,Longtitude,Regionname,Propertycount,Coordinates,MonthSale,AgeBuilding,WeekdaySale,Weekend
0,0,Abbotsford,85 Turner St,2,h,1480000.0,S,Biggin,2016-12-03,2.5,...,Yarra,-37.7996,144.9984,Northern Metropolitan,4019,"-37.7996, 144.9984",12,46,5,1
1,1,Abbotsford,25 Bloomburg St,2,h,1035000.0,S,Biggin,2016-02-04,2.5,...,Yarra,-37.8079,144.9934,Northern Metropolitan,4019,"-37.8079, 144.9934",2,116,3,0
2,2,Abbotsford,5 Charles St,3,h,1465000.0,SP,Biggin,2017-03-04,2.5,...,Yarra,-37.8093,144.9944,Northern Metropolitan,4019,"-37.8093, 144.9944",3,117,5,1
3,3,Abbotsford,40 Federation La,3,h,850000.0,PI,Biggin,2017-03-04,2.5,...,Yarra,-37.7969,144.9969,Northern Metropolitan,4019,"-37.7969, 144.9969",3,47,5,1
4,4,Abbotsford,55a Park St,4,h,1600000.0,VB,Nelson,2016-06-04,2.5,...,Yarra,-37.8072,144.9941,Northern Metropolitan,4019,"-37.8072, 144.9941",6,2,5,1


In [68]:
pr_weekend = melb_df[melb_df['Weekend']==1]['Price']   # средняя цена проданных в выходные объектов
round(pr_weekend.mean())

1081199