Skip to content

Commit

Permalink
+
Browse files Browse the repository at this point in the history
  • Loading branch information
DmitryUlyanov committed Jun 13, 2020
1 parent 988231d commit f8f63a7
Show file tree
Hide file tree
Showing 25 changed files with 81 additions and 33 deletions.
25 changes: 17 additions & 8 deletions Programming assignment, week 1: Pandas basics/PandasBasics.ipynb 100644 → 100755
Expand Up @@ -4,7 +4,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Version 1.0.1"
"Version 1.0.3"
]
},
{
Expand Down Expand Up @@ -139,7 +139,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# YOUR CODE GOES HERE\n",
Expand Down Expand Up @@ -170,7 +172,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# YOUR CODE GOES HERE\n",
Expand Down Expand Up @@ -216,17 +220,20 @@
"metadata": {},
"source": [
"<ol start=\"4\">\n",
" <li><b>What was the variance of the number of sold items per day sequence for the shop with `shop_id = 25` in December, 2014?</b></li>\n",
" <li><b>What was the variance of the number of sold items per day sequence for the shop with `shop_id = 25` in December, 2014? Do not count the items, that were sold but returned back later.</b></li>\n",
"</ol>\n",
"\n",
"* Fill `total_num_items_sold` and `days` arrays, and plot the sequence with the code below.\n",
"* Then compute variance. Remember, there can be differences in how you normalize variance (biased or unbiased estimate, see [link](https://math.stackexchange.com/questions/496627/the-difference-between-unbiased-biased-estimator-variance)). Compute ***unbiased*** estimate (use the right value for `ddof` argument in `pd.var` or `np.var`)."
"* Then compute variance. Remember, there can be differences in how you normalize variance (biased or unbiased estimate, see [link](https://math.stackexchange.com/questions/496627/the-difference-between-unbiased-biased-estimator-variance)). Compute ***unbiased*** estimate (use the right value for `ddof` argument in `pd.var` or `np.var`). \n",
"* If there were no sales at a given day, ***do not*** impute missing value with zero, just ignore that day"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"shop_id = 25\n",
Expand Down Expand Up @@ -269,7 +276,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"grader.submit(STUDENT_EMAIL, STUDENT_TOKEN)"
Expand Down Expand Up @@ -300,7 +309,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.0"
"version": "3.6.2"
}
},
"nbformat": 4,
Expand Down
2 changes: 1 addition & 1 deletion Programming assignment, week 1: Pandas basics/grader.py 100644 → 100755
Expand Up @@ -21,7 +21,7 @@ def almostEqual(x, y):

class Grader(object):
def __init__(self):
self.submission_page = 'https://www.coursera.org/api/onDemandProgrammingScriptSubmissions.v1'
self.submission_page = 'https://hub.coursera-apps.org/api/onDemandProgrammingScriptSubmissions.v1'
self.assignment_key = 'S1UqVXp-EeelpgpYPAO2Og'
self.parts = OrderedDict([
('edAEq', 'max_revenue'),
Expand Down
Empty file.
45 changes: 34 additions & 11 deletions Programming assignment, week 3: Mean encodings/Programming_assignment_week_3.ipynb 100644 → 100755
Expand Up @@ -36,7 +36,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
Expand All @@ -55,7 +57,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"sales = pd.read_csv('../readonly/final_project_data/sales_train.csv.gz')"
Expand All @@ -78,7 +82,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"index_cols = ['shop_id', 'item_id', 'date_block_num']\n",
Expand Down Expand Up @@ -131,6 +137,7 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"scrolled": true
},
"outputs": [],
Expand Down Expand Up @@ -159,7 +166,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"'''\n",
Expand Down Expand Up @@ -187,7 +196,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"grader = Grader()"
Expand Down Expand Up @@ -226,7 +237,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# YOUR CODE GOES HERE\n",
Expand Down Expand Up @@ -265,7 +278,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# YOUR CODE GOES HERE\n",
Expand Down Expand Up @@ -299,7 +314,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# YOUR CODE GOES HERE\n",
Expand Down Expand Up @@ -335,7 +352,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# YOUR CODE GOES HERE\n",
Expand All @@ -356,7 +375,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"STUDENT_EMAIL = # EMAIL HERE\n",
Expand All @@ -367,7 +388,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"grader.submit(STUDENT_EMAIL, STUDENT_TOKEN)"
Expand Down
2 changes: 1 addition & 1 deletion Programming assignment, week 3: Mean encodings/grader.py 100644 → 100755
Expand Up @@ -21,7 +21,7 @@ def almostEqual(x, y):

class Grader(object):
def __init__(self):
self.submission_page = 'https://www.coursera.org/api/onDemandProgrammingScriptSubmissions.v1'
self.submission_page = 'https://hub.coursera-apps.org/api/onDemandProgrammingScriptSubmissions.v1'
self.assignment_key = 'JVyZjZIaEeeXtQpjLCk-0A'
self.parts = OrderedDict([
('9zPRY', 'KFold_scheme'),
Expand Down
Empty file.
2 changes: 1 addition & 1 deletion Programming assignment, week 4: Ensembles/grader.py 100644 → 100755
Expand Up @@ -21,7 +21,7 @@ def almostEqual(x, y):

class Grader(object):
def __init__(self):
self.submission_page = 'https://www.coursera.org/api/onDemandProgrammingScriptSubmissions.v1'
self.submission_page = 'https://hub.coursera-apps.org/api/onDemandProgrammingScriptSubmissions.v1'
self.assignment_key = 'Lhay-55JEeet3xIBvGMumA'
self.parts = OrderedDict([
('EyiFH', 'best_alpha'),
Expand Down
36 changes: 26 additions & 10 deletions Programming assignment, week 4: KNN features/compute_KNN_features.ipynb 100644 → 100755
Expand Up @@ -44,7 +44,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import numpy as np\n",
Expand Down Expand Up @@ -87,7 +89,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"train_path = '../readonly/KNN_features_data/X.npz'\n",
Expand Down Expand Up @@ -120,7 +124,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from sklearn.base import BaseEstimator, ClassifierMixin\n",
Expand Down Expand Up @@ -354,7 +360,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# a list of K in KNN, starts with one \n",
Expand All @@ -373,9 +381,9 @@
"test_knn_feats = NNF.predict(X_test[:50])\n",
"\n",
"# This should be zero\n",
"print ('Deviation from ground thruth features: %f' % np.abs(test_knn_feats - true_knn_feats_first50[44:45]).sum())\n",
"print ('Deviation from ground thruth features: %f' % np.abs(test_knn_feats - true_knn_feats_first50).sum())\n",
"\n",
"deviation =np.abs(test_knn_feats - true_knn_feats_first50[44:45]).sum(0)\n",
"deviation =np.abs(test_knn_feats - true_knn_feats_first50).sum(0)\n",
"for m in np.where(deviation > 1e-3)[0]: \n",
" p = np.where(np.array([87, 88, 117, 146, 152, 239]) > m)[0][0]\n",
" print ('There is a problem in feature %d, which is a part of section %d.' % (m, p + 1))"
Expand Down Expand Up @@ -405,7 +413,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"for metric in ['minkowski', 'cosine']:\n",
Expand Down Expand Up @@ -441,7 +451,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Differently from other homework we will not implement OOF predictions ourselves\n",
Expand Down Expand Up @@ -485,7 +497,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"s = 0\n",
Expand All @@ -509,7 +523,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from grader import Grader\n",
Expand Down
2 changes: 1 addition & 1 deletion Programming assignment, week 4: KNN features/grader.py 100644 → 100755
Expand Up @@ -21,7 +21,7 @@ def almostEqual(x, y):

class Grader(object):
def __init__(self):
self.submission_page = 'https://www.coursera.org/api/onDemandProgrammingScriptSubmissions.v1'
self.submission_page = 'https://hub.coursera-apps.org/api/onDemandProgrammingScriptSubmissions.v1'
self.assignment_key = 'r2N4iqFlEeeRFQqEddeEzg'
self.parts = OrderedDict([
('1O8kU', 'statistic')])
Expand Down
Empty file modified README.md 100644 → 100755
Empty file.
Empty file modified Reading materials/EDA_Springleaf_screencast.ipynb 100644 → 100755
Empty file.
Empty file modified Reading materials/EDA_video2.ipynb 100644 → 100755
Empty file.
Empty file modified Reading materials/EDA_video3_screencast.ipynb 100644 → 100755
Empty file.
Empty file modified Reading materials/GBM_drop_tree.ipynb 100644 → 100755
Empty file.
Empty file.
Empty file modified Reading materials/Macros.ipynb 100644 → 100755
Empty file.
Empty file.
Empty file modified Reading materials/Metrics_video3_weighted_median.ipynb 100644 → 100755
Empty file.
Empty file modified Reading materials/Metrics_video8_soft_kappa_xgboost.ipynb 100644 → 100755
Empty file.
Empty file modified readonly/KNN_features_data/X.npz 100644 → 100755
Empty file.
Empty file modified readonly/KNN_features_data/X_test.npz 100644 → 100755
Empty file.
Empty file modified readonly/KNN_features_data/Y.npy 100644 → 100755
Empty file.
Empty file modified readonly/KNN_features_data/Y_test.npy 100644 → 100755
Empty file.
Empty file modified readonly/KNN_features_data/knn_feats_test_first50.npy 100644 → 100755
Empty file.
Empty file modified readonly/data_leakages_data/test_pairs.csv 100644 → 100755
Empty file.

0 comments on commit f8f63a7

Please sign in to comment.