# Week8: Orange Scripting Tutorial

#### Zongcheng Chu

In [1]:
!pip install orange3

Collecting orange3
[?25l  Downloading https://files.pythonhosted.org/packages/13/f5/b081bc4b7c6c122efac5a8a1c44a7b320a68724e9e6e5dbf00f8f4bbef54/Orange3-3.24.1-cp36-cp36m-manylinux1_x86_64.whl (2.5MB)
[K     |████████████████████████████████| 2.5MB 3.4MB/s 
Collecting AnyQt>=0.0.8
[?25l  Downloading https://files.pythonhosted.org/packages/ac/6d/8c2623ac14a691fb9e672a116e9988de197c27a9da505a330b961647a740/AnyQt-0.0.10-py2.py3-none-any.whl (45kB)
[K     |████████████████████████████████| 51kB 7.8MB/s 
Collecting pyqtgraph==0.10.0
[?25l  Downloading https://files.pythonhosted.org/packages/cd/ad/307e0280df5c19986c4206d138ec3a8954afc722cea991f4adb4a16337d9/pyqtgraph-0.10.0.tar.gz (1.5MB)
[K     |████████████████████████████████| 1.6MB 36.8MB/s 
Collecting openTSNE>=0.3.11
[?25l  Downloading https://files.pythonhosted.org/packages/55/eb/271a75dab4d5dac7ebc5429f00570c2b4b764e12412517f131761eaf5beb/openTSNE-0.3.12-cp36-cp36m-manylinux1_x86_64.whl (1.4MB)
[K     |███████████████████████

In [0]:
import Orange

In [0]:
# load buit-in dataset "iris"
data = Orange.data.Table("iris")

In [0]:
print(" feture 1     feture 2    feture 3     feature 4     target")
print("-" * 60)
data.domain

 feture 1     feture 2    feture 3     feature 4     target
------------------------------------------------------------


[sepal length, sepal width, petal length, petal width | iris]

In [0]:
data.domain.attributes

(ContinuousVariable(name='sepal length', number_of_decimals=1),
 ContinuousVariable(name='sepal width', number_of_decimals=1),
 ContinuousVariable(name='petal length', number_of_decimals=1),
 ContinuousVariable(name='petal width', number_of_decimals=1))

In [0]:
data.domain.class_var

DiscreteVariable(name='iris',
                 values=['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'])

In [0]:
for x in data.domain.attributes:
    print(x)

sepal length
sepal width
petal length
petal width


In [0]:
# let's print the first three rows
for row in data[:3]:
    print(row)

[5.1, 3.5, 1.4, 0.2 | Iris-setosa]
[4.9, 3.0, 1.4, 0.2 | Iris-setosa]
[4.7, 3.2, 1.3, 0.2 | Iris-setosa]


In [0]:
# get the category for each data instance
for d in data[:5]:
    print(d.get_class())

Iris-setosa
Iris-setosa
Iris-setosa
Iris-setosa
Iris-setosa


## explore data domain

domain holds the names of attributes, classes and types

In [0]:
data = Orange.data.Table("iris")
n = len(data.domain.attributes)
print("iris dataset has {} features".format(n))

iris dataset has 4 features


In [0]:
# check if each feature is continuous or discreate
n_cont = sum([1 for a in data.domain.attributes if a.is_continuous])
n_disc = sum([1 for a in data.domain.attributes if a.is_discrete])

print("iris dataset has {} continuous features and {} discrete features".format(n_cont, n_disc))

iris dataset has 4 continuous features and 0 discrete features


In [0]:
# let's print all the features
for feature in data.domain.attributes:
    print(feature.name)
    print(feature)

sepal length
sepal length
sepal width
sepal width
petal length
petal length
petal width
petal width


In [0]:
# or use index to get each feature
n = len(data.domain.attributes)
for i in range(n):
    print(data.domain[i].name)

sepal length
sepal width
petal length
petal width


## get data instances by index

In [0]:
print(data[0])

[5.1, 3.5, 1.4, 0.2 | Iris-setosa]


In [0]:
name = "sepal width"
print("Value of '%s' for the first instance:" % name, data[0][name])

Value of 'sepal width' for the first instance: 3.5


In [0]:
print("The 3rd value of the 1st data instance:", data[0][2])

The 3rd value of the 1st data instance: 1.4


## basic statistics

In [0]:
# MEAN() function receives list-type data
def MEAN(x):
    return sum(x) / len(x)

data = Orange.data.Table("iris")

for x in data.domain.attributes:
    print("mean value for {} attribute is {}".format(x, MEAN([d[x] for d in data])))

mean value for sepal length attribute is 5.843333333333335
mean value for sepal width attribute is 3.0540000000000007
mean value for petal length attribute is 3.7586666666666693
mean value for petal width attribute is 1.1986666666666672


In [0]:
col=[]
for x in data.domain.attributes:
  for d in data:
    col.append(d[x])
  print(MEAN(col))
  col=[]


5.843333333333335
3.0540000000000007
3.7586666666666693
1.1986666666666672


In [0]:
# let's get the total number for each category

data = Orange.data.Table("iris")

dictionary = {}

for d in data:
    if str(d.get_class()) not in dictionary:
        dictionary[str(d.get_class())] = 1
    else:
        dictionary[str(d.get_class())] += 1

print(dictionary)

{'Iris-setosa': 50, 'Iris-versicolor': 50, 'Iris-virginica': 50}


## Orange data and Numpy

In [0]:
# convert orange data to numpy data

data = Orange.data.Table("iris")
data.X[:3] # first 3 rows

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2]])

In [0]:
data.Y[:3] # class
print(data.Y[40:60])

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]


In [0]:
import numpy as np

# an easy way to compute the mean vlaue for each column using numpy
np.mean(data.X, axis=0)

array([5.84333333, 3.054     , 3.75866667, 1.19866667])

In [0]:
# convert numpy data to Orange data

X = np.array([[1,2],[3,4]])
data = Orange.data.Table(X)
print(data.domain)
print(data)

[Feature 1, Feature 2]
[[1, 2],
 [3, 4]]
[[1 2]
 [3 4]]


  This is separate from the ipykernel package so we can avoid doing imports until


In [0]:
# assign names to features

domain = Orange.data.Domain([Orange.data.ContinuousVariable("lenght"),
                                 Orange.data.ContinuousVariable("width")])
data = Orange.data.Table(domain, X)
print(data.domain)

[lenght, width]


## Data sampling

In [0]:
data = Orange.data.Table("iris")
print("Dataset instances:", len(data))

subset = Orange.data.Table(data.domain, [d for d in data if d["petal length"] > 3.0])
print("Subset size:", len(subset))

Dataset instances: 150
Subset size: 99


In [0]:
d_subset = []
for d in data:
  if d["petal length"] > 3.0:
      d_subset.append(d)

# First attempts to machine learning!!!

## classification problem

## step 1:
load data

In [0]:
data = Orange.data.Table("iris")
n = data.X.shape[0]
print(n)

150


## step 2:
split the data into training and testing

In [0]:
import numpy as np

# shuffle the data
permutation = np.random.permutation(n).tolist()
train_idx = permutation[:int(n*0.9)]
test_idx = permutation[int(n*0.9):]

# 90% for training and 10% for testing

train_subset = Orange.data.Table(data.domain, [d for i,d in enumerate(data) if i in train_idx])
test_subset = Orange.data.Table(data.domain, [d for i,d in enumerate(data) if i in test_idx])


print("we have {} train samples".format(train_subset.X.shape[0]))
print("we have {} test samples".format(test_subset.X.shape[0]))

we have 135 train samples
we have 15 test samples


  # Remove the CWD from sys.path while we load stuff.
  # This is added back by InteractiveShellApp.init_path()


## step 3:
create a learning model for classification

In [0]:
model = Orange.classification.LinearSVMLearner()

  """)


In [0]:
# the model tries to learn how to do the classification from the training samples
classifier = model(train_subset)

## step 4:
do testing

In [0]:
# do prediction on test data
predicts = classifier(test_subset)
probabilities = classifier(test_subset, 1)
print(probabilities)

[[1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]]


In [0]:
c_values = data.domain.class_var.values
for pred in predicts:
    print(c_values[int(pred)])

Iris-setosa
Iris-setosa
Iris-setosa
Iris-setosa
Iris-setosa
Iris-setosa
Iris-setosa
Iris-versicolor
Iris-versicolor
Iris-versicolor
Iris-versicolor
Iris-virginica
Iris-virginica
Iris-virginica
Iris-virginica


In [0]:
# True label
for d in test_subset:
    print(d.get_class())

Iris-setosa
Iris-setosa
Iris-setosa
Iris-setosa
Iris-setosa
Iris-setosa
Iris-setosa
Iris-versicolor
Iris-versicolor
Iris-versicolor
Iris-versicolor
Iris-virginica
Iris-virginica
Iris-virginica
Iris-virginica


## Regression Problem

Regression in Orange is very similar to classification. These both require class-labeled data. Just like in classification, regression is implemented with learners and regression models (regressors).

### Step 1:

load housing data

In [0]:
import Orange

data = Orange.data.Table("housing")
print("-------features-------")
for att in data.domain.attributes:
    print(att)
    
print("----------first 5 rows---------")
print(data[:5])

print("--------table length-------")
print(data.X.shape[0])

-------features-------
CRIM
ZN
INDUS
CHAS
NOX
RM
AGE
DIS
RAD
TAX
PTRATIO
B
LSTAT
----------first 5 rows---------
[[0.00632, 18.0, 2.31, 0, 0.5380, 6.575, 65.2, 4.0900, 1, 296, 15.3, 396.90, 4.98 | 24.0],
 [0.02731, 0.0, 7.07, 0, 0.4690, 6.421, 78.9, 4.9671, 2, 242, 17.8, 396.90, 9.14 | 21.6],
 [0.02729, 0.0, 7.07, 0, 0.4690, 7.185, 61.1, 4.9671, 2, 242, 17.8, 392.83, 4.03 | 34.7],
 [0.03237, 0.0, 2.18, 0, 0.4580, 6.998, 45.8, 6.0622, 3, 222, 18.7, 394.63, 2.94 | 33.4],
 [0.06905, 0.0, 2.18, 0, 0.4580, 7.147, 54.2, 6.0622, 3, 222, 18.7, 396.90, 5.33 | 36.2]]
--------table length-------
506


## Step 2:

split the data into training and testing

In [0]:
import numpy as np

n = 506
# shuffle the data
permutation = np.random.permutation(n).tolist()
train_idx = permutation[:int(n*0.9)]
test_idx = permutation[int(n*0.9):]

# 90% for training and 10% for testing

train_subset = Orange.data.Table(data.domain, [d for i,d in enumerate(data) if i in train_idx])
test_subset = Orange.data.Table(data.domain, [d for i,d in enumerate(data) if i in test_idx])


print("we have {} train samples".format(train_subset.X.shape[0]))
print("we have {} test samples".format(test_subset.X.shape[0]))

we have 455 train samples
we have 51 test samples


## Step 3:

create a learning model for regression

In [0]:
lin = Orange.regression.linear.LinearRegressionLearner()
rf = Orange.regression.random_forest.RandomForestRegressionLearner()

regressor_lin = lin(train_subset)
regressor_rf = rf(train_subset)

## Step 4:

make prediction

In [0]:
print("True housing price   ", "\t  linreg   ", "\t \t  rf")

for d in test_subset:
    print(d.get_class(),"\t \t ", regressor_lin(d), "\t ", regressor_rf(d))

True housing price    	  linreg    	 	  rf
20.4 	 	  19.817276956609106 	  20.230000000000004
18.4 	 	  19.722948336313994 	  19.0
24.7 	 	  24.93021106690206 	  22.270000000000003
20.9 	 	  20.7625702933054 	  21.869999999999997
23.4 	 	  23.758221171962408 	  23.779999999999998
20.3 	 	  22.243489736159916 	  20.979999999999997
24.8 	 	  25.9566115422511 	  22.859999999999996
22.9 	 	  25.080573271204855 	  22.4
22.5 	 	  21.871254970150254 	  21.57
21.2 	 	  23.12186910676722 	  20.279999999999998
23.0 	 	  20.09134928350198 	  18.82
14.4 	 	  2.699363779599082 	  14.239999999999998
21.5 	 	  20.818844580085184 	  19.740000000000002
22.7 	 	  24.86606450076908 	  20.940000000000005
23.8 	 	  26.37257085023996 	  21.28
32.5 	 	  31.410574196667277 	  28.169999999999998
42.3 	 	  36.53734278518792 	  46.459999999999994
24.4 	 	  23.580531008590675 	  22.07
21.7 	 	  22.3287405455444 	  19.17
28.1 	 	  24.957551065997986 	  24.130000000000003
23.7 	 	  9.455705743260815 	  17.22
28.7 	

## Python Scripts in Orange

**Inputs**

*  Data (Orange.data.Table): input dataset bound to in_data variable

*  Learner (Orange.classification.Learner): input learner bound to in_learner variable

*  Classifier (Orange.classification.Learner): input classifier bound to in_classifier variable

*  Object: input Python object bound to in_object variable

**Outputs**

*  Data (Orange.data.Table): dataset retrieved from out_data variable

*  Learner (Orange.classification.Learner): learner retrieved from out_learner variable

*  Classifier (Orange.classification.Learner): classifier retrieved from out_classifier variable

*  Object: Python object retrieved from out_object variable

Python Script widget can be used to run a python script in the input, when a suitable functionality is not implemented in an existing widget. The script has **in_data**, **in_distance**, **in_learner**, **in_classifier** and **in_object** variables (from input signals) in its local namespace

For instance the following script would simply pass on all signals it receives:

<br>

out_data = in_data<br>
out_distance = in_distance<br>
out_learner = in_learner<br>
out_classifier = in_classifier<br>
out_object = in_object<br>

In [0]:
# let's first load iris data in Orange and print out the data table length

print(len(in_data))

In [0]:
# create a subset and output the new subset data

import Orange

subset = Orange.data.Table(in_data.domain, [d for d in in_data if d["petal length"] > 3.0])
out_data = subset

In [0]:
# compute the mean, max, min, std, var for each column
import numpy as np
new_data = in_data.X

print(np.max(new_data, axis=0))
print(np.min(new_data, axis=0))
print(np.std(new_data, axis=0))
print(np.var(new_data, axis=0))

# Group Project

we are going to analyze "titanic" dataset this week.<br>

for this dataset, we have each passenger's status, age and sex. Based on this information, we are going to predict wether a person survived in the crash.

In [0]:
# 1. load data (1pts)
import Orange
import numpy as np
import pandas as pd

data = Orange.data.Table("titanic")

In [4]:
#2. print out the how many records do we have in this dataset. (1 pts)
n = data.X.shape[0]
print(n)

2201


In [5]:
#3. report how many categories do we have for each column(status, age and sex)
# you need to program to get the answer (2 pts)
count = 0;

for feature in data.domain.attributes:
  count += 1
  print(feature.name)
print("There are",count,"columns.")

status
age
sex
There are 3 columns.


In [6]:
#4. convert to numerical representation (3 pts)
#for example
# female -> 0 male ->1
# adult -> 0 child->1
# first -> 0 second -> 1 third->2 crew -> 3

odata = Orange.data.Table("titanic")
odata = data.X
print(odata)

#status: 1:first, 2: second, 3:third, 0:crew
#age: 1:child, 0:adult
#sex: 1:male, 0:female




[[1. 0. 1.]
 [1. 0. 1.]
 [1. 0. 1.]
 ...
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]


In [7]:
#5. save your new data in Orange Table (1 pts)

odata = Orange.data.Table(odata)
print(odata.domain)
print(odata)

[Feature 1, Feature 2, Feature 3]
[[1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0, 1],
 [1, 0

  


In [8]:
#6. randomly shuffle the dataset and split it into training data and testing data (2 pts)
# training : testing = 9:1

n=data.X.shape[0]

permutation = np.random.permutation(n).tolist()
predict_idx = permutation[:int(n*0.9)]
test_idx = permutation[int(n*0.9):]


predict_subset = Orange.data.Table(data.domain, [d for i,d in enumerate(data) if i in predict_idx])
test_subset = Orange.data.Table(data.domain, [d for i,d in enumerate(data) if i in test_idx])


print("we have {} predict samples".format(predict_subset.X.shape[0]))
print("we have {} test samples".format(test_subset.X.shape[0]))

we have 1980 predict samples
we have 221 test samples


  if __name__ == '__main__':
  # Remove the CWD from sys.path while we load stuff.


1


In [9]:
#7. create two classification models and train your models on the training dataset (2 pts)

model = Orange.classification.LinearSVMLearner()

classifier=model(predict_subset)

  """)


In [10]:
#8. Make prediction using your trained models on testing dataset.(2 pts)

predicts = classifier(test_subset)
probabilities = classifier(test_subset, 1)
print(probabilities)


[[1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 

In [0]:

for d in test_subset:
    print(d.get_class())

yes
yes
yes
yes
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
no
no
no
no
no
no
no
no
no
no
no
no
no
no
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
yes
yes
yes
yes
yes
yes
yes
yes
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
yes
no
no
no
no
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
yes
no


In [59]:
#9. report your prediction accuracy for each model. (2 pts)
# accuracy = number of right prediction / total testing samples
correct = data[test_idx]
right = 0
total = 0
for x in probabilities:
  if correct[x][3] is "yes" and probabilities[x][0] is 1.0:
    right += 1
  elif correct[x][3] is "yes" and probabilities[x][1] is 0.0:
    right += 1
  total +=1
print (right/total)


IndexError: ignored

In [56]:
correct = data[test_idx]
if correct[2][3] is 'Value(\'survived\', yes)':
  print (hi)


print(correct[0][3])

probabilities[0][1]

no


0.0

In [11]:
#Same as before, but for Logrithmic Test

import Orange
import numpy as np
import pandas as pd



data = Orange.data.Table("titanic")
odata = data.X
data = Orange.data.Table(odata)

n = data.X.shape[0]
# shuffle the data
permutation = np.random.permutation(n).tolist()
predict_idx = permutation[:int(n*0.9)]
test_idx = permutation[int(n*0.9):]


predict_subset = Orange.data.Table(data.domain, [d for i,d in enumerate(data) if i in predict_idx])
test_subset = Orange.data.Table(data.domain, [d for i,d in enumerate(data) if i in test_idx])


print("we have {} train samples".format(predict_subset.X.shape[0]))
print("we have {} test samples".format(test_subset.X.shape[0]))

lin = Orange.regression.linear.LinearRegressionLearner()
rf = Orange.regression.random_forest.RandomForestRegressionLearner()

regressor_lin = lin(predict_subset)
regressor_rf = rf(predict_subset)

for d in test_subset:
    print(d.get_class(),"\t \t ", regressor_lin(d), "\t ", regressor_rf(d))

  # Remove the CWD from sys.path while we load stuff.


we have 1980 train samples
we have 221 test samples


ValueError: ignored