Skip to content

Commit

Permalink
writing results and testing are left
Browse files Browse the repository at this point in the history
  • Loading branch information
aboelhamd committed Jul 27, 2019
1 parent 6845db4 commit 0a0a161
Showing 1 changed file with 56 additions and 111 deletions.
167 changes: 56 additions & 111 deletions loadmodels.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -15458,8 +15458,8 @@
"\n",
"for file in files:\n",
" # These are the classifiers that permit training data with sample weights!\n",
" names = [\"NaiveBayes\", \"LinearSVM\", \"RBFSVM\", \"DecisionTree\",\n",
" \"RandomForest\", \"AdaBoost\"]\n",
"# names = [\"NaiveBayes\", \"LinearSVM\", \"RBFSVM\", \"DecisionTree\",\n",
"# \"RandomForest\", \"AdaBoost\"]\n",
" \n",
" print(\"file name :\", file)\n",
" data = pd.read_csv(files[file], delimiter=r\"\\s+\", header=None).dropna()\n",
Expand All @@ -15469,39 +15469,33 @@
" enc = joblib.load('models/'+'encoder'+'-'+file[:-4])\n",
" # remove records with unseen word, will return always 0 for that record\n",
" # this will be solved later\n",
" unseen = []\n",
" for i in range(len(data.values)) :\n",
" for j in range(len(data.values[i])) :\n",
" if data.values[i][j] not in enc.categories_[j] :\n",
" unseen.append(data.values[i])\n",
" \n",
" \n",
" features = enc.fit_transform(data.iloc[:,2:])\n",
"# display(enc.categories_)\n",
"# display(data.iloc[:,2:],features)\n",
" # target and weights\n",
" target = data.iloc[:,0]\n",
" weights = data.iloc[:,1].values\n",
" seen = [x for x in data.values if x not in unseen]\n",
"\n",
" samples = enc.transform(seen)\n",
" \n",
"# print(\"file name :\", file)\n",
" print(\"Rules(classes) number :\",target.nunique())\n",
" print(\"Words(features) number :\",features.shape[1])\n",
" print(\"Records number :\",features.shape[0], end = '')\n",
" display(data.iloc[:target.nunique(),:])\n",
"# print(\"Rules(classes) number :\",target.nunique())\n",
" print(\"Words(features) number :\",samples.shape[1])\n",
" print(\"Records number :\",samples.shape[0], end = '')\n",
"# display(data.iloc[:target.nunique(),:])\n",
" \n",
" # split to train and test\n",
" X_train, X_test, y_train, y_test, w_train, w_test = \\\n",
" train_test_split(features, target, weights, test_size=.5, random_state=0, stratify=target)\n",
"# display(features, target, weights)\n",
"# display(X_train, X_test, y_train, y_test, w_train, w_test)\n",
" \n",
" # train models and print their scores\n",
" for name in names:\n",
" print(\"model :\", name, \",\", end = '')\n",
" modelname = 'sklearn-models/'+name+'-'+file[:-4]+'.model'\n",
" loaded_model = joblib.load(modelname)\n",
" score = loaded_model.score(X=X_test, y=y_test, sample_weight=w_test)\n",
" print(\" score =\", score)\n",
" \n",
" # save models\n",
"# name+'-'+file[:-4]+'.model'\n",
"# modelname = 'sklearn-models/'+name+'-'+file[:-4]+'.model'\n",
"# joblib.dump(clf, filename)\n",
" # prediction by using svm\n",
"# print(\"model :\", name, \",\", end = '')\n",
" name = 'LinearSVM'\n",
" modelname = 'sklearn-models/'+name+'-'+file[:-4]+'.model'\n",
" loaded_model = joblib.load(modelname)\n",
" rules = loaded_model.predict(samples)\n",
" \n",
" # write results in file\n",
" \n",
" print(\"----------------------------------------------\\n\")\n"
]
},
Expand Down Expand Up @@ -15551,95 +15545,46 @@
},
{
"cell_type": "code",
"execution_count": 49,
"execution_count": 72,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" <th>4</th>\n",
" <th>5</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>rule</td>\n",
" <td>weight</td>\n",
" <td>word1</td>\n",
" <td>word2</td>\n",
" <td>word3</td>\n",
" <td>word4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0</td>\n",
" <td>0.314649</td>\n",
" <td>lo</td>\n",
" <td>poder</td>\n",
" <td>haber</td>\n",
" <td>ser</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>0.342676</td>\n",
" <td>lo</td>\n",
" <td>poder</td>\n",
" <td>haber</td>\n",
" <td>ser</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2</td>\n",
" <td>0.342676</td>\n",
" <td>lo</td>\n",
" <td>poder</td>\n",
" <td>haber</td>\n",
" <td>ser</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 0 1 2 3 4 5\n",
"0 rule weight word1 word2 word3 word4\n",
"1 0 0.314649 lo poder haber ser\n",
"2 1 0.342676 lo poder haber ser\n",
"3 2 0.342676 lo poder haber ser"
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
"name": "stdout",
"output_type": "stream",
"text": [
"rule\n",
"weight\n",
"word1\n",
"word2\n",
"word3\n",
"word4\n",
"0\n",
"0.314649\n",
"lo\n",
"poder\n",
"haber\n",
"ser\n",
"1\n",
"0.342676\n",
"lo\n",
"poder\n",
"haber\n",
"ser\n",
"2\n",
"0.342676\n",
"lo\n",
"poder\n",
"haber\n",
"ser\n"
]
}
],
"source": [
"data = pd.read_csv(files[file], delimiter=r\"\\s+\", header=None).dropna()\n",
"data"
"data\n",
"for i in range(len(data.values)) :\n",
" for j in range(len(data.values[i])) :\n",
" print(data.values[i][j])"
]
},
{
Expand Down

0 comments on commit 0a0a161

Please sign in to comment.