[Reading the DataSet](#reading_the_dataset)<br>
[Handling Missing Data](#handling_missing_data)<br>
[Conversion to the Tensor Format](#tensor_format_conversion)

---

### reading the dataset
<a id = 'reading_the_dataset'></a>

In [32]:
import os
import tensorflow as tf

def mkdir_if_not_exist(path):
    """make a directory if it does nto exit."""
    if not isinstance(path, str):
        path = os.path.join(*path)
    if not os.path.exists(path):
        os.makedirs(path)

In [33]:
data_file = '../data/house_tiny.csv'
mkdir_if_not_exist('../data')
with open(data_file, 'w') as f:
    f.write('NumRooms,Alley, Price \n')
    f.write('NA,Pave,127500\n')
    f.write('2,NA,106000\n')
    f.write('4,NA,178100\n')
    f.write('NA,NA,140000')

In [34]:
import pandas as pd

data = pd.read_csv(data_file)
print(data)

   NumRooms Alley   Price 
0       NaN  Pave   127500
1       2.0   NaN   106000
2       4.0   NaN   178100
3       NaN   NaN   140000


---

### Handling Missing Data
<a id= 'handling_missing_data'></a>

In [35]:
inputs, outputs = data.iloc[:, 0:2], data.iloc[:, 2]
inputs = inputs.fillna(inputs.mean())
print(inputs)

   NumRooms Alley
0       3.0  Pave
1       2.0   NaN
2       4.0   NaN
3       3.0   NaN


In [36]:
#for columns with categorical values like alley, Nan is treated a true/false in conjuction with absolute value
inputs = pd.get_dummies(inputs, dummy_na = True)
print(inputs)

   NumRooms  Alley_Pave  Alley_nan
0       3.0           1          0
1       2.0           0          1
2       4.0           0          1
3       3.0           0          1


---

### Conversion to the Tensor format
<a id='tensor_format_conversion'></a>

In [37]:
X, y = tf.constant(inputs.values), tf.constant(outputs.values)
X, y

(<tf.Tensor: shape=(4, 3), dtype=float64, numpy=
 array([[3., 1., 0.],
        [2., 0., 1.],
        [4., 0., 1.],
        [3., 0., 1.]])>,
 <tf.Tensor: shape=(4,), dtype=int64, numpy=array([127500, 106000, 178100, 140000], dtype=int64)>)