In [1]:
import os

md_dir = os.path.join(os.getcwd(), 'mds')

md_filenames = [os.path.join(os.getcwd(), 'mds', filename) for filename in os.listdir(md_dir)]
print(md_filenames)

['d:\\git-nlp\\nlp-practice\\Practice-2019-03-23\\mds\\0xInfection-Awesome-WAF-master-README.md', 'd:\\git-nlp\\nlp-practice\\Practice-2019-03-23\\mds\\1N3-IntruderPayloads-master-README.md', 'd:\\git-nlp\\nlp-practice\\Practice-2019-03-23\\mds\\2B5-ia-3B5-master-module3-README.md', 'd:\\git-nlp\\nlp-practice\\Practice-2019-03-23\\mds\\6174-ppurl-master-data-112.md', 'd:\\git-nlp\\nlp-practice\\Practice-2019-03-23\\mds\\6174-ppurl-master-data-13.md', 'd:\\git-nlp\\nlp-practice\\Practice-2019-03-23\\mds\\6174-ppurl-master-data-154.md', 'd:\\git-nlp\\nlp-practice\\Practice-2019-03-23\\mds\\6174-ppurl-master-data-43.md', 'd:\\git-nlp\\nlp-practice\\Practice-2019-03-23\\mds\\6174-ppurl-master-data-53.md', 'd:\\git-nlp\\nlp-practice\\Practice-2019-03-23\\mds\\6174-ppurl-master-data-64.md', 'd:\\git-nlp\\nlp-practice\\Practice-2019-03-23\\mds\\6174-ppurl-master-data-81.md', 'd:\\git-nlp\\nlp-practice\\Practice-2019-03-23\\mds\\6174-ppurl-master-data-90.md', 'd:\\git-nlp\\nlp-practice\\Practi

In [15]:
from collections import namedtuple

LineStat = namedtuple('LineStat', 'source cleaned line_num total_lines_in_text is_header')

In [17]:
lines = []

for f in md_filenames:
    with open(f, 'rt', encoding='utf-8') as f:
        file_lines = f.read().splitlines()
    for i, line in enumerate(file_lines):
        source = line.strip()
        if source=='':
            continue
        cleaned = source
        is_header = False
        if line.startswith('# ') or line.startswith('## ') or line.startswith('### '):
            splitted_header = source.split(' ', 1)
            if len(splitted_header)<2:
                continue
            cleaned = splitted_header[1]
            cleaned = cleaned.strip()
            is_header = True
        line_stat = LineStat(source=source, cleaned=cleaned, line_num=i, 
                             total_lines_in_text=len(file_lines), is_header=is_header)
        lines.append(line_stat)
                
 
print(len(lines))

208846


In [18]:
print('\n'.join([str(l) for l in lines[:10]]))

LineStat(source='# Awesome WAF [![Awesome](https://cdn.rawgit.com/sindresorhus/awesome/d7305f38d29fed78fa85652e3a63e154dd8e8829/media/badge.svg "Awesome")](https://github.com/0xinfection/awesome-waf)', cleaned='Awesome WAF [![Awesome](https://cdn.rawgit.com/sindresorhus/awesome/d7305f38d29fed78fa85652e3a63e154dd8e8829/media/badge.svg "Awesome")](https://github.com/0xinfection/awesome-waf)', line_num=0, total_lines_in_text=2855, is_header=True)
LineStat(source='> A curated list of awesome WAF stuff. 🔥', cleaned='> A curated list of awesome WAF stuff. 🔥', line_num=1, total_lines_in_text=2855, is_header=False)
LineStat(source='>', cleaned='>', line_num=2, total_lines_in_text=2855, is_header=False)
LineStat(source='> __Foreword:__ This was originally my own collection on WAFs. I am open-sourcing it in the hope that it will be useful for pentesters and researchers out there. You might want to keep this repo on a watch, since it will be updated regularly. "The community just learns from each

In [19]:
print(sum(l.source.startswith('### ') for l in lines))

5012


In [20]:
print(sum(l.is_header for l in lines))

12556


In [43]:
# divide to learn and test set

from random import shuffle

shuffle(lines)
threshold = int(len(lines) * 0.7)
lines_learn = lines[:threshold]
lines_test = lines[threshold:]

print(f"Total lines: {len(lines)}, learn set: {len(lines_learn)}, lines_test: {len(lines_test)}")


Total lines: 208846, learn set: 146192, lines_test: 62654


In [50]:
import csv
csv.field_size_limit(1000000)

def save_as_csv(filename, data):
    with open(filename, 'wt', encoding='utf-8', newline='') as f:
        fieldnames = LineStat._fields
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows([r._asdict() for r in data])


In [51]:
print(LineStat._fields)

save_as_csv('learn.csv', lines_learn)
save_as_csv('test.csv', lines_test)

('source', 'cleaned', 'line_num', 'total_lines_in_text', 'is_header')


In [31]:
def naive_is_header(line: LineStat):
    return False

In [52]:
def short_is_header(line: LineStat):
    return True if len(line.cleaned)<32 else False

In [74]:
def check_measures(classifier):
    tp, tn, fp, fn = 0, 0, 0, 0
    for l in lines:
        l_wo_res = LineStat(source='', cleaned=l.cleaned, line_num=l.line_num, is_header=False)
        res = classifier(l_wo_res)
        if res:
            if l.is_header:
                tp += 1
            else:
                fp += 1
                print(l.source)
        else:
            if l.is_header:
                fn += 1
            else:
                tn += 1
    print(f"TP={tp}, TN={tn}, FP={fp}, FN={fn}")
    prec = tp / (tp+fp+1e-6)
    recall = tp / (tp+fn+1e-6)
    f1 = 2 * prec * recall /(prec + recall+1e-6)
    print(f"Precision={prec}, recall={recall}, F1={f1}")

In [70]:
check_measures(naive_is_header)

TP=0, TN=201302, FP=0, FN=7546
Precision=0.0, recall=0.0, F1=0.0


In [71]:
check_measures(short_is_header)

TP=5590, TN=122296, FP=79006, FN=1956
Precision=0.06607877440935649, recall=0.7407898223243056, F1=0.12133431164657707


In [91]:
bad_chars = set('#=./<>|(){}:[];')
def rules_classifier(line: LineStat):
    text: str = line.cleaned
    numbers = sum(c.isdigit() for c in text)
    if numbers*2>len(text):
        return False
    if not text[0].isalnum():
        return False
    if text[0].isalnum() and text[0].islower():
        return False
    if any((c in bad_chars) for c in text):
        return False
    if len(text)>120:
        return False
    if line.line_num<2:
        return True
    if len(text)<32:
        return True
    if text.istitle():
        return True
    
    return False

In [92]:
check_measures(rules_classifier)

360 Firewall
AlertLogic Firewall
Aliyundun Firewall
Anquanbao WAF
Anyu Firewall
Approach Firewall
Armor Defense
Baidu Yunjiasu
Barikode Firewall
Barracuda WAF
Better WP Security
BitNinja Firewall
Bluedon IST
BinarySec WAF
BlockDos
CDN NS Application Gateway
ChinaCache Firewall
Chuangyu WAF
Cisco ACE XML Gateway
Cloudbric Firewall
Cloudflare
Comodo Firewall
Deny-All Firewall
Distil Firewall
DoSArrest Internet Security
DynamicWeb Injection Check
FortiWeb Firewall
GoDaddy Firewall
GreyWizard Firewall
HyperGuard Firewall
IBM DataPower
ISAServer
Janusec Application Gateway
Jiasule Firewall
KnownSec Firewall
LiteSpeed Firewall
MissionControl WAF
NewDefend Firewall
NSFocus Firewall
Palo Alto Firewall
PerimeterX Firewall
Profense Firewall
Radware Appwall
Reblaze Firewall
Sabre Firewall
Safe3 Firewall
SafeDog Firewall
Secure Entry Firewall
Shadow Daemon WAF
ShieldSecurity
SiteGround Firewall
SiteLock TrueShield
Sophos UTM Firewall
SquareSpace Firewall
Sucuri CloudProxy
Tencent Cloud WAF
USP Sec

Sentiment   SentimentText
Core
Suggested approach -
Validate XML
LSTM Poetry
You said I was falling apart
I wish I were you
I wanted you to know how I feel
I could have settled it all
I can't stop this way
The stars are singing the songs
I need you to stay away
We are the spirit of the beast
The way that we started love
Let the sun shine on your face
We will hear you fall apart
MIT
文本样式用来设置文字的外观，以下是常用文本样式属性，更详细的信息请参考API文档。
设置字体的属性是 `fontFamily`
示例代码如下：
编译并运行，效果如下：
示例代码如下：
编译并运行，效果如下：
具体代码如下：
编译并运行，效果如下：
设置加粗的属性为 `bold`
设置斜体的属性为 `italic`
具体代码如下：
编译并运行，效果如下：
图1 一段样式丰富的文本
Egret 提供两种实现方案。
给一段文字定义红色、字号30样式的代码如下：
实现图1中效果的代码如下：
将得到与JSON方式设置样式类似的结果。
编译并运行，效果如下：
文本的布局分为横向和纵向两种。
横向布局可以设置文字居左，水平居中，居右。
纵向布局可以设置文字居顶，垂直居中，居底。
编译并运行，效果如下：
编译并运行，效果如下：
在使用横向布局与纵向布局时，两者并不冲突。
编译并运行，效果如下：
编译并运行，效果如下：
点击上面的有链接的文字效果如下：
OR
OR MANUALLY
Poetry
Welcome!
Gold-Standard Sentence Splitting Corpus
The published PDF
Julia$ julia
Welcome to Adventure Game Studio! This new Windows Help version of the
Copyright and term

Who owns the zebra?
Playing Atari games
You will also learn basics of
Did it find any bugs so far?
Running 4 workers
Did you see the differentce?
What is the bug?
Did it crash? How?
When combined with
Thinking about ASSERTS
Koan "ASSERT-TRUE"
CLASSES  PROBABILITIES
Examples
Setup
To Use
Rename a data object\n"
YIELD result
RETURN result
YIELD result
RETURN result
YIELD result RETURN result
YIELD result
RETURN result
RETURN result
YIELD result return result
RETURN result
And retrieve it with
YIELD result
RETURN result
GraphAware NLP Using StanfordNLP
Getting the Software
Introduction and How-To
RETURN n, result
The O
Apollo  MISSION
1   MISSION
NASA    O
April   O
24  O
1967    O
RMI client
Usage
Where params may contain
Configuration
String num1
String num2
String result
Testing
Next is the test
Tool Support
Usage
Configuration
String num1
String num2
String result
Testing
Next is the test
Tool Support
Motion AI Ruby SDK
Project Repo
Motion AI Docs
Smart Mirror with a smart AI 🤖
Verify

Update your dependencies
NULL
NULL
MIT
Installation
Usage
Configuration
Buka terminal dan tulis
Berikut outputnya
Cuekin aku Kak!
Iya cuekin aku
Gpp kok cuekin aku!
That's right
Ekspresi Subtitusi
Halo Desty
Buat kopi ukuran sedang
Mesin sedang diservis
Mesin sedang diservis
Selamat siang
Ida
Benyamin
Slamet
Safi'i
Ida
Benyamin
Slamet
Safi'i
Ida adalah seorang Penyanyi
Benyamin adalah seorang Pelawak
Slamet adalah seorang Pelukis
Safi'i adalah seorang Penyair
Halo! Selamat belajar Ruby
Luas Segiempat adalah 20
Halo dunia!
Proc
Proc pertama
Proc kedua
Proc
Proc
Halo Dunia!
Outputnya tidak ada sama sekali
Avanza melaju ke depan
Avanza rem
Avanza mundur ke belakang
Avanza melaju ke depan
Avanza rem
Avanza mundur ke belakang
TO
TO
TIGERBLOOD
WINNING
INTERSECTION
UNION
EXAMPLES
In general,
Copyright 2009, 2010 Kristopher Michael Kowal
Where
Where
Where
Where
Where
Where
Where
Where
Where
Where
Where
Where
Where
Thanks,
Lisa
Where `x` is a string
To install these packages with
Run tests with

Training and Testing
AI
Simple State Machine
Walking,
Idle
State Machine
Behavior Trees
Utility Based AI
Random Python scripts for data visualization
Alternative Array
MonadPlus Array
MonadZero Array
Alt Array
Plus Array
Bifunctor Either
Bifoldable Either
Bitraversable Either
Functor List
Foldable List
Unfoldable List
Traversable List
Apply List
Applicative List
Bind List
Monad List
Alt List
Plus List
Alternative List
MonadZero List
MonadPlus List
Extend List
For example
For example,
For example,
Get the length of a list
For example,
Functor Maybe
Apply Maybe
Applicative Maybe
Alt Maybe
Plus Maybe
Alternative Maybe
Bind Maybe
Monad Maybe
MonadZero Maybe
Extend Maybe
Invariant Maybe
Monoid Unit
Monoid String
Foldable Array
Foldable Maybe
Foldable First
Foldable Last
Foldable Additive
Foldable Dual
Foldable Disj
Foldable Conj
Foldable Multiplicative
Traversable Array
Traversable Maybe
Traversable First
Traversable Last
Traversable Additive
Traversable Dual
Traversable Conj
Traversable Di

参考吴翰清的《白帽子讲Web安全》一书。
执行以下命令启动s2-001测试环境
执行任意命令POC（无回显，空格用`@`代替）：
期待大佬研究出有回显的POC。
2 0xf7fa35a0
8 0xf7fa33dc
9 0xffffd360
50 0xffffd3ec
53 0xffffd57f
55 0xffffd5a0
56 0xffffdb5c
57 0xffffdb73
58 0xffffdb82
59 0xffffdb93
60 0xffffdba8
61 0xffffdbb3
62 0xffffdbc8
63 0xffffdbdc
64 0xffffdbea
65 0xffffdbf5
66 0xffffdc1b
67 0xffffdc2c
68 0xffffdc36
Run calibration target service
Edit the launch file
Run calibration target service
Edit the launch file
Edit the launch file
Start roscore and sync times
Arguments
Keys
Calibrate z axis
Calibrate y axis
Calibrate x axis
Button Mapings
Button Mapings
Y - Stop Manipulation Pipeline
Back - Go home
Xbox Button - AUTO
MOVEMENT
TESTING
TRAJECTORY HANDLING
DEBUGGING
EXPERIENCE PLANNING
Start fake controllers
Rviz Visualizer
Load meshes
See what is connected
See what is connected
See what is connected
Change device ID
Test camera view
Visualizer
Gelsight
0 Do nothing
2 Go home
3 Insertion task
4 Gelsight touch control
10 Automated Insertion Test
41 Get SRD