# Data Analysis of Sacramento real estate transactions Dataset using Numpy.
#### Data Source: https://support.spatialkey.com/spatialkey-sample-csv-data/

In [1]:
import numpy as np
import pandas as pd

#### Readable DataFrame using pandas

In [2]:
df=pd.read_csv('Sacramento_realestate_transactions.csv')
df.head(10)

Unnamed: 0,street,city,zip,state,beds,baths,sq__ft,type,sale_date,price,latitude,longitude
0,3526 HIGH ST,SACRAMENTO,95838,CA,2,1,836,Residential,Wed May 21 00:00:00 EDT 2008,59222,38.631913,-121.434879
1,51 OMAHA CT,SACRAMENTO,95823,CA,3,1,1167,Residential,Wed May 21 00:00:00 EDT 2008,68212,38.478902,-121.431028
2,2796 BRANCH ST,SACRAMENTO,95815,CA,2,1,796,Residential,Wed May 21 00:00:00 EDT 2008,68880,38.618305,-121.443839
3,2805 JANETTE WAY,SACRAMENTO,95815,CA,2,1,852,Residential,Wed May 21 00:00:00 EDT 2008,69307,38.616835,-121.439146
4,6001 MCMAHON DR,SACRAMENTO,95824,CA,2,1,797,Residential,Wed May 21 00:00:00 EDT 2008,81900,38.51947,-121.435768
5,5828 PEPPERMILL CT,SACRAMENTO,95841,CA,3,1,1122,Condo,Wed May 21 00:00:00 EDT 2008,89921,38.662595,-121.327813
6,6048 OGDEN NASH WAY,SACRAMENTO,95842,CA,3,2,1104,Residential,Wed May 21 00:00:00 EDT 2008,90895,38.681659,-121.351705
7,2561 19TH AVE,SACRAMENTO,95820,CA,3,1,1177,Residential,Wed May 21 00:00:00 EDT 2008,91002,38.535092,-121.481367
8,11150 TRINITY RIVER DR Unit 114,RANCHO CORDOVA,95670,CA,2,2,941,Condo,Wed May 21 00:00:00 EDT 2008,94905,38.621188,-121.270555
9,7325 10TH ST,RIO LINDA,95673,CA,3,2,1146,Residential,Wed May 21 00:00:00 EDT 2008,98937,38.700909,-121.442979


In [3]:
df.tail()

Unnamed: 0,street,city,zip,state,beds,baths,sq__ft,type,sale_date,price,latitude,longitude
980,9169 GARLINGTON CT,SACRAMENTO,95829,CA,4,3,2280,Residential,Thu May 15 00:00:00 EDT 2008,232425,38.457679,-121.35962
981,6932 RUSKUT WAY,SACRAMENTO,95823,CA,3,2,1477,Residential,Thu May 15 00:00:00 EDT 2008,234000,38.499893,-121.45889
982,7933 DAFFODIL WAY,CITRUS HEIGHTS,95610,CA,3,2,1216,Residential,Thu May 15 00:00:00 EDT 2008,235000,38.708824,-121.256803
983,8304 RED FOX WAY,ELK GROVE,95758,CA,4,2,1685,Residential,Thu May 15 00:00:00 EDT 2008,235301,38.417,-121.397424
984,3882 YELLOWSTONE LN,EL DORADO HILLS,95762,CA,3,2,1362,Residential,Thu May 15 00:00:00 EDT 2008,235738,38.655245,-121.075915


#### Import Sacramento_realestate_transactions.csv using getfromtxt() method, converting into 2-dimensional array and stored into a varialble called sample_array.

In [4]:
sample_array= np.genfromtxt ('Sacramento_realestate_transactions.csv', delimiter=",",dtype='string')

In [5]:
sample_array

array([['street', 'city', 'zip', ..., 'price', 'latitude', 'longitude'],
       ['3526 HIGH ST', 'SACRAMENTO', '95838', ..., '59222', '38.631913',
        '-121.434879'],
       ['51 OMAHA CT', 'SACRAMENTO', '95823', ..., '68212', '38.478902',
        '-121.431028'],
       ..., 
       ['7933 DAFFODIL WAY', 'CITRUS HEIGHTS', '95610', ..., '235000',
        '38.708824', '-121.256803'],
       ['8304 RED FOX WAY', 'ELK GROVE', '95758', ..., '235301', '38.417',
        '-121.397424'],
       ['3882 YELLOWSTONE LN', 'EL DORADO HILLS', '95762', ..., '235738',
        '38.655245', '-121.075915']],
      dtype='|S35')

#### Show only first 5 rows and only (stree, city, zip, state) columns.

In [6]:
first_six_rows = sample_array[:6,:4]
first_six_rows

array([['street', 'city', 'zip', 'state'],
       ['3526 HIGH ST', 'SACRAMENTO', '95838', 'CA'],
       ['51 OMAHA CT', 'SACRAMENTO', '95823', 'CA'],
       ['2796 BRANCH ST', 'SACRAMENTO', '95815', 'CA'],
       ['2805 JANETTE WAY', 'SACRAMENTO', '95815', 'CA'],
       ['6001 MCMAHON DR', 'SACRAMENTO', '95824', 'CA']],
      dtype='|S35')

#### Display result in boolean, True where zip is equal to 95815.

In [7]:
matching_zip=first_six_rows=='95815'
matching_zip

array([[False, False, False, False],
       [False, False, False, False],
       [False, False, False, False],
       [False, False,  True, False],
       [False, False,  True, False],
       [False, False, False, False]], dtype=bool)

#### Display the index where zip is equal to 95815 using where( ) method.

In [8]:
zip_index=np.where(first_six_rows=='95815')
zip_index
#the answer will return a tuple showing first array row 3 & 4, second array columns 2 & 2

(array([3, 4]), array([2, 2]))

In [21]:
# Similarly we can see the index of any element in the array e.g "SACRAMENTO"

In [22]:
city_index=np.where(first_six_rows=='SACRAMENTO')
city_index

(array([1, 2, 3, 4, 5]), array([1, 1, 1, 1, 1]))

#### Now save 'first_six_rows' array into new file called 'result.csv' using savetxt() method

In [23]:
np.savetxt('result.csv',first_six_rows,delimiter=',',fmt='%s')

#### Now open the 'result.csv' and check the new array, assign it to a variable new_file

In [24]:
new_file=np.genfromtxt ('result.csv', delimiter=",",dtype='string')

In [25]:
new_file

array([['street', 'city', 'zip', 'state'],
       ['3526 HIGH ST', 'SACRAMENTO', '95838', 'CA'],
       ['51 OMAHA CT', 'SACRAMENTO', '95823', 'CA'],
       ['2796 BRANCH ST', 'SACRAMENTO', '95815', 'CA'],
       ['2805 JANETTE WAY', 'SACRAMENTO', '95815', 'CA'],
       ['6001 MCMAHON DR', 'SACRAMENTO', '95824', 'CA']],
      dtype='|S16')

#### Open 'result.csv' through pandas.

In [26]:
df=pd.read_csv('result.csv')

In [27]:
df

Unnamed: 0,street,city,zip,state
0,3526 HIGH ST,SACRAMENTO,95838,CA
1,51 OMAHA CT,SACRAMENTO,95823,CA
2,2796 BRANCH ST,SACRAMENTO,95815,CA
3,2805 JANETTE WAY,SACRAMENTO,95815,CA
4,6001 MCMAHON DR,SACRAMENTO,95824,CA
