Skip to content

Commit

Permalink
update code examples
Browse files Browse the repository at this point in the history
  • Loading branch information
amaiya committed May 20, 2022
1 parent 7d571cd commit 5fc3f11
Show file tree
Hide file tree
Showing 4 changed files with 108 additions and 99 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Most recent releases are shown at the top. Each release shows:
- **Changed**: Additional parameters, changes to inputs or outputs, etc
- **Fixed**: Bug fixes that don't change documented behaviour

## 0.31.2 (TBD)
## 0.31.2 (2022-05-20)

### new:
- N/A
Expand Down
20 changes: 10 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,16 @@ kwe = KeywordExtractor()
kwe.extract_keywords(text, candidate_generator='noun_phrases')

# OUTPUT
#[('machine learning', 0.0784313725490196),
# ('text classification', 0.049019607843137254),
# ('image classification', 0.049019607843137254),
# ('exact answers', 0.0392156862745098),
# ('augmented machine learning', 0.0392156862745098),
# ('graph data', 0.029411764705882353),
# ('node classification', 0.029411764705882353),
# ('entity recognition', 0.029411764705882353),
# ('code example', 0.029411764705882353),
# ('index documents', 0.029411764705882353)]
[('machine learning', 0.5341716824761019),
('augmented machine learning', 0.5208544167057394),
('text classification', 0.5134074336523509),
('image classification', 0.5071170746851726),
('node classification', 0.4973034499292447),
('tabular data', 0.49645958463369566),
('entity recognition', 0.45195059648705926),
('exact answers', 0.4462502183477142),
('import ktrain', 0.32891369271775894),
('load model', 0.32052348289886556)]
```
- **2022-01-28**
- **ktrain v0.29.x** is released and includes miscellaneous enhancements contributed by [Sandy Khosasi](https://github.com/ilos-vigil) such as [support for MobileNetV3 and EfficientNet](https://colab.research.google.com/drive/1EJHpMVG6fBCg33UPla_Ly_6LQdswU2Ur?usp=sharing), [plotting improvements](https://colab.research.google.com/drive/1_WaRQ0J4g0VTn6HWS3kszdFZbBBWoa7R?usp=sharing), and [raw confidence scores in QA](https://colab.research.google.com/drive/1ParprLN9hFX6cxJ1w7bv91PYx4o0J1zm?usp=sharing).
Expand Down
184 changes: 97 additions & 87 deletions examples/text/keyword_extraction_example.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
},
{
"cell_type": "code",
"execution_count": 28,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -60,7 +60,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"# of words in downloaded paper: 4551\n"
"# of words in downloaded paper: 4316\n"
]
}
],
Expand Down Expand Up @@ -95,23 +95,23 @@
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 396 ms, sys: 19.8 ms, total: 416 ms\n",
"Wall time: 415 ms\n"
"CPU times: user 341 ms, sys: 16.9 ms, total: 358 ms\n",
"Wall time: 357 ms\n"
]
},
{
"data": {
"text/plain": [
"[('machine learning', 0.10548523206751055),\n",
" ('step', 0.06751054852320675),\n",
" ('learning rate', 0.046413502109704644),\n",
" ('arxiv preprint', 0.046413502109704644),\n",
" ('text classification', 0.03375527426160337),\n",
" ('augmented machine', 0.02531645569620253),\n",
" ('open-domain question-answering', 0.02531645569620253),\n",
" ('augmented machine learning', 0.02531645569620253),\n",
" ('bert', 0.02109704641350211),\n",
" ('low-code library', 0.02109704641350211)]"
"[('machine learning', 0.5503444817814314),\n",
" ('augmented machine', 0.5123881190828152),\n",
" ('augmented machine learning', 0.5123881190828152),\n",
" ('low-code library', 0.5107922072149182),\n",
" ('step', 0.5092460272048237),\n",
" ('text classification', 0.5044526957819503),\n",
" ('open-domain question-answering', 0.4996712653266335),\n",
" ('learning rate', 0.4894264238049616),\n",
" ('bert', 0.424790141017796),\n",
" ('arxiv preprint', 0.16264098705836771)]"
]
},
"execution_count": 6,
Expand Down Expand Up @@ -143,23 +143,23 @@
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 1.04 s, sys: 0 ns, total: 1.04 s\n",
"Wall time: 1.04 s\n"
"CPU times: user 855 ms, sys: 103 µs, total: 856 ms\n",
"Wall time: 855 ms\n"
]
},
{
"data": {
"text/plain": [
"[('machine learning', 0.0784313725490196),\n",
" ('text classification', 0.049019607843137254),\n",
" ('image classification', 0.049019607843137254),\n",
" ('exact answers', 0.0392156862745098),\n",
" ('augmented machine learning', 0.0392156862745098),\n",
" ('graph data', 0.029411764705882353),\n",
" ('node classification', 0.029411764705882353),\n",
" ('entity recognition', 0.029411764705882353),\n",
" ('code example', 0.029411764705882353),\n",
" ('index documents', 0.029411764705882353)]"
"[('machine learning', 0.5341716824761019),\n",
" ('augmented machine learning', 0.5208544167057394),\n",
" ('text classification', 0.5134074336523509),\n",
" ('image classification', 0.5071170746851726),\n",
" ('node classification', 0.4973034499292447),\n",
" ('tabular data', 0.49645958463369566),\n",
" ('entity recognition', 0.45195059648705926),\n",
" ('exact answers', 0.4462502183477142),\n",
" ('import ktrain', 0.32891369271775894),\n",
" ('load model', 0.32052348289886556)]"
]
},
"execution_count": 8,
Expand Down Expand Up @@ -188,16 +188,16 @@
{
"data": {
"text/plain": [
"[('augmented machine learning', 0.07017543859649122),\n",
" ('a. s. maiya', 0.05263157894736842),\n",
" ('optimal learning rate', 0.03508771929824561),\n",
" ('natural language questions', 0.03508771929824561),\n",
" ('support text data', 0.017543859649122806),\n",
" ('learning rate schedules', 0.017543859649122806),\n",
" ('machine learning model', 0.017543859649122806),\n",
" ('unsupervised topic modeling', 0.017543859649122806),\n",
" ('large text corpus', 0.017543859649122806),\n",
" ('social media accounts', 0.017543859649122806)]"
"[('augmented machine learning', 0.541435342459079),\n",
" ('machine learning model', 0.4982195592681719),\n",
" ('support text data', 0.49549171563837363),\n",
" ('learning rate schedules', 0.47765279578595193),\n",
" ('a. s. maiya', 0.4612715229636928),\n",
" ('unsupervised topic modeling', 0.44648865417358047),\n",
" ('large text corpus', 0.4374416332143215),\n",
" ('optimal learning rate', 0.42667304584617965),\n",
" ('non-supervised ml tasks', 0.2330746472277638),\n",
" ('natural language questions', 0.21662908635171388)]"
]
},
"execution_count": 9,
Expand All @@ -224,16 +224,16 @@
{
"data": {
"text/plain": [
"[('machine learning', 0.0784313725490196),\n",
" ('text classification', 0.049019607843137254),\n",
" ('image classification', 0.049019607843137254),\n",
" ('exact answers', 0.0392156862745098),\n",
" ('augmented machine learning', 0.0392156862745098),\n",
" ('graph data', 0.029411764705882353),\n",
" ('node classification', 0.029411764705882353),\n",
" ('entity recognition', 0.029411764705882353),\n",
" ('code example', 0.029411764705882353),\n",
" ('index documents', 0.029411764705882353)]"
"[('machine learning', 0.5341716824761019),\n",
" ('augmented machine learning', 0.5208544167057394),\n",
" ('text classification', 0.5134074336523509),\n",
" ('image classification', 0.5071170746851726),\n",
" ('node classification', 0.4973034499292447),\n",
" ('tabular data', 0.49645958463369566),\n",
" ('entity recognition', 0.45195059648705926),\n",
" ('exact answers', 0.4462502183477142),\n",
" ('import ktrain', 0.32891369271775894),\n",
" ('load model', 0.32052348289886556)]"
]
},
"execution_count": 10,
Expand Down Expand Up @@ -263,25 +263,35 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Building prefix dict from the default dictionary ...\n",
"Loading model from cache /tmp/jieba.cache\n",
"Loading model cost 0.669 seconds.\n",
"Prefix dict has been built successfully.\n"
]
},
{
"data": {
"text/plain": [
"[('监督 学习', 0.06),\n",
" ('训练 数据', 0.06),\n",
" ('学习 算法', 0.04),\n",
" ('机器 学习', 0.02),\n",
" ('学习 任务', 0.02),\n",
" ('样本 输入', 0.02),\n",
" ('输入 输出', 0.02),\n",
" ('输入 映射', 0.02),\n",
" ('自由 一组', 0.02),\n",
" ('一组 训练', 0.02)]"
"[('监督 学习', 0.53),\n",
" ('机器 学习', 0.48103658536585364),\n",
" ('学习 任务', 0.4764634146341463),\n",
" ('样本 输入', 0.4627439024390244),\n",
" ('输入 映射', 0.4398780487804878),\n",
" ('自由 一组', 0.39719512195121953),\n",
" ('一组 训练', 0.3926219512195122),\n",
" ('训练 数据', 0.38670731707317074),\n",
" ('学习 算法', 0.22731707317073171),\n",
" ('输入 输出', 0.01152439024390244)]"
]
},
"execution_count": 12,
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -312,25 +322,25 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[(\"données d'entraînement\", 0.0392156862745098),\n",
" (\"l'apprentissage supervisé\", 0.0196078431372549),\n",
" (\"tâche d'apprentissage\", 0.0196078431372549),\n",
" (\"d'apprentissage automatique\", 0.0196078431372549),\n",
" ('automatique consistant', 0.0196078431372549),\n",
" (\"base d'exemples\", 0.0196078431372549),\n",
" ('paires entrée-sortie', 0.0196078431372549),\n",
" (\"d'entraînement étiquetées\", 0.0196078431372549),\n",
" ('étiquetées constituées', 0.0196078431372549),\n",
" (\"constituées d'un\", 0.0196078431372549)]"
"[(\"l'apprentissage supervisé\", 0.5098039215686274),\n",
" (\"tâche d'apprentissage\", 0.4928634698232476),\n",
" (\"d'apprentissage automatique\", 0.489783387687724),\n",
" ('automatique consistant', 0.4815698353263277),\n",
" (\"base d'exemples\", 0.43588195031606075),\n",
" ('paires entrée-sortie', 0.4261283568869026),\n",
" (\"données d'entraînement\", 0.4051314571002939),\n",
" (\"d'entraînement étiquetées\", 0.39122075935096834),\n",
" ('étiquetées constituées', 0.3835205540121593),\n",
" (\"constituées d'un\", 0.37787373676369934)]"
]
},
"execution_count": 13,
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -360,7 +370,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 13,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -410,7 +420,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -440,15 +450,15 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 2.19 s, sys: 225 ms, total: 2.42 s\n",
"Wall time: 9.51 s\n"
"CPU times: user 3.94 s, sys: 95 ms, total: 4.04 s\n",
"Wall time: 9.36 s\n"
]
}
],
Expand All @@ -460,7 +470,7 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 17,
"metadata": {},
"outputs": [
{
Expand All @@ -473,19 +483,19 @@
{
"data": {
"text/plain": [
"[('supervised learning', 0.07317073170731707),\n",
" ('training data', 0.07317073170731707),\n",
" ('learning algorithm', 0.04878048780487805),\n",
" ('machine learning', 0.024390243902439025),\n",
" ('learning task', 0.024390243902439025),\n",
" ('output based', 0.024390243902439025),\n",
" ('example input-output', 0.024390243902439025),\n",
" ('input-output pairs', 0.024390243902439025),\n",
" ('labeled training', 0.024390243902439025),\n",
" ('data consisting', 0.024390243902439025)]"
"[('supervised learning', 0.5357142857142857),\n",
" ('machine learning', 0.4946192305347235),\n",
" ('learning task', 0.4894975916102677),\n",
" ('output based', 0.44980488994573503),\n",
" ('example input-output', 0.4395616120968234),\n",
" ('input-output pairs', 0.43443997317236754),\n",
" ('training data', 0.4236784342418145),\n",
" ('labeled training', 0.40499054935674655),\n",
" ('data consisting', 0.3941070666422779),\n",
" ('learning algorithm', 0.2632461435278337)]"
]
},
"execution_count": 20,
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
Expand Down
1 change: 0 additions & 1 deletion ktrain/text/kw/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,6 @@ def extract_keywords(
else:
noun_phrases = blob.noun_phrases
for np in noun_phrases:
print(np)
words = np.split()
n = len(words)
if n not in ngram_lens:
Expand Down

0 comments on commit 5fc3f11

Please sign in to comment.