+

hse-aml · Jun 13, 2020 · f8f63a7 · f8f63a7
1 parent 988231d
commit f8f63a7
Show file tree

Hide file tree

Showing 25 changed files with 81 additions and 33 deletions.
diff --git a/Programming assignment, week 1: Pandas basics/PandasBasics.ipynb b/Programming assignment, week 1: Pandas basics/PandasBasics.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Version 1.0.1"
+    "Version 1.0.3"
    ]
   },
   {
@@ -139,7 +139,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "# YOUR CODE GOES HERE\n",
@@ -170,7 +172,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "# YOUR CODE GOES HERE\n",
@@ -216,17 +220,20 @@
    "metadata": {},
    "source": [
     "<ol start=\"4\">\n",
-    "  <li><b>What was the variance of the number of sold items per day sequence for the shop with `shop_id = 25` in December, 2014?</b></li>\n",
+    "  <li><b>What was the variance of the number of sold items per day sequence for the shop with `shop_id = 25` in December, 2014? Do not count the items, that were sold but returned back later.</b></li>\n",
     "</ol>\n",
     "\n",
     "* Fill `total_num_items_sold` and `days` arrays, and plot the sequence with the code below.\n",
-    "* Then compute variance. Remember, there can be differences in how you normalize variance (biased or unbiased estimate, see [link](https://math.stackexchange.com/questions/496627/the-difference-between-unbiased-biased-estimator-variance)). Compute ***unbiased*** estimate (use the right value for `ddof` argument in `pd.var` or `np.var`)."
+    "* Then compute variance. Remember, there can be differences in how you normalize variance (biased or unbiased estimate, see [link](https://math.stackexchange.com/questions/496627/the-difference-between-unbiased-biased-estimator-variance)). Compute ***unbiased*** estimate (use the right value for `ddof` argument in `pd.var` or `np.var`). \n",
+    "* If there were no sales at a given day, ***do not*** impute missing value with zero, just ignore that day"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "shop_id = 25\n",
@@ -269,7 +276,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "grader.submit(STUDENT_EMAIL, STUDENT_TOKEN)"
@@ -300,7 +309,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.0"
+   "version": "3.6.2"
   }
  },
  "nbformat": 4,

diff --git a/Programming assignment, week 1: Pandas basics/grader.py b/Programming assignment, week 1: Pandas basics/grader.py
@@ -21,7 +21,7 @@ def almostEqual(x, y):
 
 class Grader(object):
     def __init__(self):
-        self.submission_page = 'https://www.coursera.org/api/onDemandProgrammingScriptSubmissions.v1'
+        self.submission_page = 'https://hub.coursera-apps.org/api/onDemandProgrammingScriptSubmissions.v1'
         self.assignment_key = 'S1UqVXp-EeelpgpYPAO2Og'
         self.parts = OrderedDict([
                     ('edAEq', 'max_revenue'),

diff --git a/Programming assignment, week 2: Data leakages/Data leakages.ipynb b/Programming assignment, week 2: Data leakages/Data leakages.ipynb
diff --git a/Programming assignment, week 3: Mean encodings/Programming_assignment_week_3.ipynb b/Programming assignment, week 3: Mean encodings/Programming_assignment_week_3.ipynb
@@ -36,7 +36,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "import pandas as pd\n",
@@ -55,7 +57,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "sales = pd.read_csv('../readonly/final_project_data/sales_train.csv.gz')"
@@ -78,7 +82,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "index_cols = ['shop_id', 'item_id', 'date_block_num']\n",
@@ -131,6 +137,7 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
+    "collapsed": true,
     "scrolled": true
    },
    "outputs": [],
@@ -159,7 +166,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "'''\n",
@@ -187,7 +196,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "grader = Grader()"
@@ -226,7 +237,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "# YOUR CODE GOES HERE\n",
@@ -265,7 +278,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "# YOUR CODE GOES HERE\n",
@@ -299,7 +314,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "# YOUR CODE GOES HERE\n",
@@ -335,7 +352,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "# YOUR CODE GOES HERE\n",
@@ -356,7 +375,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "STUDENT_EMAIL = # EMAIL HERE\n",
@@ -367,7 +388,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "grader.submit(STUDENT_EMAIL, STUDENT_TOKEN)"

diff --git a/Programming assignment, week 3: Mean encodings/grader.py b/Programming assignment, week 3: Mean encodings/grader.py
@@ -21,7 +21,7 @@ def almostEqual(x, y):
 
 class Grader(object):
     def __init__(self):
-        self.submission_page = 'https://www.coursera.org/api/onDemandProgrammingScriptSubmissions.v1'
+        self.submission_page = 'https://hub.coursera-apps.org/api/onDemandProgrammingScriptSubmissions.v1'
         self.assignment_key = 'JVyZjZIaEeeXtQpjLCk-0A'
         self.parts = OrderedDict([
                     ('9zPRY', 'KFold_scheme'),

diff --git a/Programming assignment, week 4: Ensembles/Programming_assignment_week_4.ipynb b/Programming assignment, week 4: Ensembles/Programming_assignment_week_4.ipynb
diff --git a/Programming assignment, week 4: Ensembles/grader.py b/Programming assignment, week 4: Ensembles/grader.py
@@ -21,7 +21,7 @@ def almostEqual(x, y):
 
 class Grader(object):
     def __init__(self):
-        self.submission_page = 'https://www.coursera.org/api/onDemandProgrammingScriptSubmissions.v1'
+        self.submission_page = 'https://hub.coursera-apps.org/api/onDemandProgrammingScriptSubmissions.v1'
         self.assignment_key = 'Lhay-55JEeet3xIBvGMumA'
         self.parts = OrderedDict([
                         ('EyiFH', 'best_alpha'),

diff --git a/Programming assignment, week 4: KNN features/compute_KNN_features.ipynb b/Programming assignment, week 4: KNN features/compute_KNN_features.ipynb
@@ -44,7 +44,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "import numpy as np\n",
@@ -87,7 +89,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "train_path = '../readonly/KNN_features_data/X.npz'\n",
@@ -120,7 +124,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "from sklearn.base import BaseEstimator, ClassifierMixin\n",
@@ -354,7 +360,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "# a list of K in KNN, starts with one \n",
@@ -373,9 +381,9 @@
     "test_knn_feats = NNF.predict(X_test[:50])\n",
     "\n",
     "# This should be zero\n",
-    "print ('Deviation from ground thruth features: %f' % np.abs(test_knn_feats - true_knn_feats_first50[44:45]).sum())\n",
+    "print ('Deviation from ground thruth features: %f' % np.abs(test_knn_feats - true_knn_feats_first50).sum())\n",
     "\n",
-    "deviation =np.abs(test_knn_feats - true_knn_feats_first50[44:45]).sum(0)\n",
+    "deviation =np.abs(test_knn_feats - true_knn_feats_first50).sum(0)\n",
     "for m in np.where(deviation > 1e-3)[0]: \n",
     "    p = np.where(np.array([87, 88, 117, 146, 152, 239]) > m)[0][0]\n",
     "    print ('There is a problem in feature %d, which is a part of section %d.' % (m, p + 1))"
@@ -405,7 +413,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "for metric in ['minkowski', 'cosine']:\n",
@@ -441,7 +451,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "# Differently from other homework we will not implement OOF predictions ourselves\n",
@@ -485,7 +497,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "s = 0\n",
@@ -509,7 +523,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "from grader import Grader\n",

diff --git a/Programming assignment, week 4: KNN features/grader.py b/Programming assignment, week 4: KNN features/grader.py
@@ -21,7 +21,7 @@ def almostEqual(x, y):
 
 class Grader(object):
     def __init__(self):
-        self.submission_page = 'https://www.coursera.org/api/onDemandProgrammingScriptSubmissions.v1'
+        self.submission_page = 'https://hub.coursera-apps.org/api/onDemandProgrammingScriptSubmissions.v1'
         self.assignment_key = 'r2N4iqFlEeeRFQqEddeEzg'
         self.parts = OrderedDict([
                     ('1O8kU', 'statistic')])

diff --git a/README.md b/README.md
diff --git a/Reading materials/EDA_Springleaf_screencast.ipynb b/Reading materials/EDA_Springleaf_screencast.ipynb
diff --git a/Reading materials/EDA_video2.ipynb b/Reading materials/EDA_video2.ipynb
diff --git a/Reading materials/EDA_video3_screencast.ipynb b/Reading materials/EDA_video3_screencast.ipynb
diff --git a/Reading materials/GBM_drop_tree.ipynb b/Reading materials/GBM_drop_tree.ipynb
diff --git a/Reading materials/Hyperparameters_tuning_video2_RF_n_estimators.ipynb b/Reading materials/Hyperparameters_tuning_video2_RF_n_estimators.ipynb
diff --git a/Reading materials/Macros.ipynb b/Reading materials/Macros.ipynb
diff --git a/Reading materials/Metrics_video2_constants_for_MSE_and_MAE.ipynb b/Reading materials/Metrics_video2_constants_for_MSE_and_MAE.ipynb
diff --git a/Reading materials/Metrics_video3_weighted_median.ipynb b/Reading materials/Metrics_video3_weighted_median.ipynb
diff --git a/Reading materials/Metrics_video8_soft_kappa_xgboost.ipynb b/Reading materials/Metrics_video8_soft_kappa_xgboost.ipynb
diff --git a/readonly/KNN_features_data/X.npz b/readonly/KNN_features_data/X.npz
diff --git a/readonly/KNN_features_data/X_test.npz b/readonly/KNN_features_data/X_test.npz
diff --git a/readonly/KNN_features_data/Y.npy b/readonly/KNN_features_data/Y.npy
diff --git a/readonly/KNN_features_data/Y_test.npy b/readonly/KNN_features_data/Y_test.npy
diff --git a/readonly/KNN_features_data/knn_feats_test_first50.npy b/readonly/KNN_features_data/knn_feats_test_first50.npy
diff --git a/readonly/data_leakages_data/test_pairs.csv b/readonly/data_leakages_data/test_pairs.csv