writing results and testing are left

aboelhamd · Jul 27, 2019 · 0a0a161 · 0a0a161
1 parent 6845db4
commit 0a0a161
Showing 1 changed file with 56 additions and 111 deletions.
diff --git a/loadmodels.ipynb b/loadmodels.ipynb
@@ -15458,8 +15458,8 @@
     "\n",
     "for file in files:\n",
     "    # These are the classifiers that permit training data with sample weights!\n",
-    "    names = [\"NaiveBayes\", \"LinearSVM\", \"RBFSVM\", \"DecisionTree\",\n",
-    "         \"RandomForest\", \"AdaBoost\"]\n",
+    "#     names = [\"NaiveBayes\", \"LinearSVM\", \"RBFSVM\", \"DecisionTree\",\n",
+    "#          \"RandomForest\", \"AdaBoost\"]\n",
     "    \n",
     "    print(\"file name :\", file)\n",
     "    data = pd.read_csv(files[file], delimiter=r\"\\s+\", header=None).dropna()\n",
@@ -15469,39 +15469,33 @@
     "    enc = joblib.load('models/'+'encoder'+'-'+file[:-4])\n",
     "    # remove records with unseen word, will return always 0 for that record\n",
     "    # this will be solved later\n",
+    "    unseen = []\n",
+    "    for i in range(len(data.values)) :\n",
+    "        for j in range(len(data.values[i])) :\n",
+    "            if data.values[i][j] not in enc.categories_[j] :\n",
+    "                unseen.append(data.values[i])\n",
     "    \n",
-    "    \n",
-    "    features = enc.fit_transform(data.iloc[:,2:])\n",
-    "#     display(enc.categories_)\n",
-    "#     display(data.iloc[:,2:],features)\n",
-    "    # target and weights\n",
-    "    target = data.iloc[:,0]\n",
-    "    weights = data.iloc[:,1].values\n",
+    "    seen = [x for x in data.values if x not in unseen]\n",
+    "\n",
+    "    samples = enc.transform(seen)\n",
     "    \n",
     "#     print(\"file name :\", file)\n",
-    "    print(\"Rules(classes) number :\",target.nunique())\n",
-    "    print(\"Words(features) number :\",features.shape[1])\n",
-    "    print(\"Records number :\",features.shape[0], end = '')\n",
-    "    display(data.iloc[:target.nunique(),:])\n",
+    "#     print(\"Rules(classes) number :\",target.nunique())\n",
+    "    print(\"Words(features) number :\",samples.shape[1])\n",
+    "    print(\"Records number :\",samples.shape[0], end = '')\n",
+    "#     display(data.iloc[:target.nunique(),:])\n",
     "    \n",
-    "    # split to train and test\n",
-    "    X_train, X_test, y_train, y_test, w_train, w_test = \\\n",
-    "        train_test_split(features, target, weights, test_size=.5, random_state=0, stratify=target)\n",
     "#     display(features, target, weights)\n",
-    "#     display(X_train, X_test, y_train, y_test, w_train, w_test)\n",
     "    \n",
-    "    # train models and print their scores\n",
-    "    for name in names:\n",
-    "        print(\"model :\", name, \",\", end = '')\n",
-    "        modelname = 'sklearn-models/'+name+'-'+file[:-4]+'.model'\n",
-    "        loaded_model = joblib.load(modelname)\n",
-    "        score = loaded_model.score(X=X_test, y=y_test, sample_weight=w_test)\n",
-    "        print(\" score =\", score)\n",
-    "        \n",
-    "        # save models\n",
-    "#         name+'-'+file[:-4]+'.model'\n",
-    "#         modelname = 'sklearn-models/'+name+'-'+file[:-4]+'.model'\n",
-    "#         joblib.dump(clf, filename)\n",
+    "    # prediction by using svm\n",
+    "#     print(\"model :\", name, \",\", end = '')\n",
+    "    name = 'LinearSVM'\n",
+    "    modelname = 'sklearn-models/'+name+'-'+file[:-4]+'.model'\n",
+    "    loaded_model = joblib.load(modelname)\n",
+    "    rules = loaded_model.predict(samples)\n",
+    "    \n",
+    "    # write results in file\n",
+    "    \n",
     "    print(\"----------------------------------------------\\n\")\n"
    ]
   },
@@ -15551,95 +15545,46 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 49,
+   "execution_count": 72,
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>0</th>\n",
-       "      <th>1</th>\n",
-       "      <th>2</th>\n",
-       "      <th>3</th>\n",
-       "      <th>4</th>\n",
-       "      <th>5</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>rule</td>\n",
-       "      <td>weight</td>\n",
-       "      <td>word1</td>\n",
-       "      <td>word2</td>\n",
-       "      <td>word3</td>\n",
-       "      <td>word4</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>0</td>\n",
-       "      <td>0.314649</td>\n",
-       "      <td>lo</td>\n",
-       "      <td>poder</td>\n",
-       "      <td>haber</td>\n",
-       "      <td>ser</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>1</td>\n",
-       "      <td>0.342676</td>\n",
-       "      <td>lo</td>\n",
-       "      <td>poder</td>\n",
-       "      <td>haber</td>\n",
-       "      <td>ser</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>2</td>\n",
-       "      <td>0.342676</td>\n",
-       "      <td>lo</td>\n",
-       "      <td>poder</td>\n",
-       "      <td>haber</td>\n",
-       "      <td>ser</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "      0         1      2      3      4      5\n",
-       "0  rule    weight  word1  word2  word3  word4\n",
-       "1     0  0.314649     lo  poder  haber    ser\n",
-       "2     1  0.342676     lo  poder  haber    ser\n",
-       "3     2  0.342676     lo  poder  haber    ser"
-      ]
-     },
-     "execution_count": 49,
-     "metadata": {},
-     "output_type": "execute_result"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "rule\n",
+      "weight\n",
+      "word1\n",
+      "word2\n",
+      "word3\n",
+      "word4\n",
+      "0\n",
+      "0.314649\n",
+      "lo\n",
+      "poder\n",
+      "haber\n",
+      "ser\n",
+      "1\n",
+      "0.342676\n",
+      "lo\n",
+      "poder\n",
+      "haber\n",
+      "ser\n",
+      "2\n",
+      "0.342676\n",
+      "lo\n",
+      "poder\n",
+      "haber\n",
+      "ser\n"
+     ]
     }
    ],
    "source": [
     "data = pd.read_csv(files[file], delimiter=r\"\\s+\", header=None).dropna()\n",
-    "data"
+    "data\n",
+    "for i in range(len(data.values)) :\n",
+    "    for j in range(len(data.values[i])) :\n",
+    "        print(data.values[i][j])"
    ]
   },
   {