From 734b49997a837c6d22e15e20670f15a113f0e13b Mon Sep 17 00:00:00 2001 From: Ishaan Shah <70190533+ishaan812@users.noreply.github.com> Date: Tue, 15 Aug 2023 06:35:56 +0530 Subject: [PATCH] FIX: Fixed Seed Script --- scripts/assets/organisations.txt | 1001 ++++++++++++++++++++++++++++++ scripts/assets/repositories.txt | 1 + scripts/gitstar_ranking_orgs.py | 53 ++ scripts/seed_script.py | 25 +- src/app.ts | 7 +- src/searchtools/search.ts | 2 +- src/searchtools/searchutils.ts | 18 +- 7 files changed, 1089 insertions(+), 18 deletions(-) create mode 100644 scripts/assets/organisations.txt create mode 100644 scripts/assets/repositories.txt create mode 100644 scripts/gitstar_ranking_orgs.py diff --git a/scripts/assets/organisations.txt b/scripts/assets/organisations.txt new file mode 100644 index 0000000..7d5cf3f --- /dev/null +++ b/scripts/assets/organisations.txt @@ -0,0 +1,1001 @@ +APIs.guru +microsoft +google +apache +facebook +alibaba +facebookresearch +vuejs +Tencent +tensorflow +freeCodeCamp +github +openai +airbnb +facebookarchive +dotnet +fossasia +vercel +TheAlgorithms +symfony +EbookFoundation +angular +square +hashicorp +rust-lang +mozilla +huggingface +kubernetes +apple +shadowsocks +golang +flutter +Netflix +twbs +nodejs +elastic +twitter +aws +NVIDIA +laravel +spring-projects +Azure +JetBrains +ossu +PacktPublishing +electron +spatie +pytorch +ant-design +ohmyzsh +ethereum +30-seconds +GoogleCloudPlatform +docker +ReactiveX +h5bp +reduxjs +redis +aws-samples +Shopify +grafana +Unity-Technologies +awslabs +thoughtbot +doocs +d3 +deepmind +ytdl-org +denoland +doctrine +expressjs +Automattic +ionic-team +Homebrew +jobbole +tc39 +cloudflare +ElemeFE +NVlabs +rails +gothinkster +googlearchive +uber +ansible +pallets +reactjs +apachecn +GoogleChrome +opencv +Genymobile +socketio +mapbox +mui +spotify +prometheus +jquery +webpack +didi +FormidableLabs +grpc +firebase +jenkinsci +storybookjs +nuxt +Yalantis +puppeteer +bilibili +django +open-mmlab +fastai +atom +moby +OWASP +python +Kong +jgraph +udacity +godotengine +home-assistant +graphql +nestjs +FortAwesome +neovim +tauri-apps +Ramotion +bitcoin +keras-team +babel +mongodb +getsentry +gin-gonic +papers-we-love +plotly +Hack-with-Github +mtdvio +segmentio +gohugoio +akveo +datawhalechina +bytedance +serverless +vitejs +prettier +codrops +thunlp +pypa +thephpleague +ipfs +Kotlin +IBM +signalapp +Semantic-Org +adobe +chartjs +encode +baidu +moment +netdata +swagger-api +OpnTec +realm +pingcap +adafruit +gatsbyjs +strapi +linkedin +zsh-users +php-fig +prisma +styled-components +PowerShell +postcss +dair-ai +cypress-io +jekyll +dcloudio +expo +resume +lodash +tesseract-ocr +dmlc +Meituan-Dianping +minio +TryGhost +ValveSoftware +eclipse +oracle +koajs +scrapy +you-dont-need +HubSpot +rstudio +syncthing +salesforce +projectdiscovery +dropbox +influxdata +auth0 +containers +w3c +caddyserver +yahoo +scikit-learn +rancher +angular-ui +kodecocodes +mdn +mobxjs +traefik +commaai +obsproject +hoppscotch +mattermost +marmelab +KhronosGroup +aquasecurity +espressif +youzan +localstack +meteor +git +x64dbg +JuliaLang +dwyl +yarnpkg +trailofbits +realpython +facebookincubator +openresty +discourse +ecomfe +npm +adobe-fonts +videojs +pixijs +webtorrent +mastodon +Leaflet +composer +infinitered +gorilla +twitter-archive +nccgroup +openstack +NationalSecurityAgency +fex-team +palantir +DefinitelyTyped +tldr-pages +WordPress +hapijs +jupyter +RocketChat +frappe +Alamofire +rapid7 +xitu +wix +fastlane +nytimes +basecamp +gogs +confluentinc +iview +digitalocean +Azure-Samples +termux +hexojs +pyenv +php +aspnet +nwjs +servo +nasa +allenai +yeoman +futurice +guzzle +metafizzy +sequelize +Qihoo360 +Yelp +kriasoft +heroku +ory +gulpjs +AFNetworking +WebAssembly +mandiant +istio +raspberrypi +bazelbuild +powerline +rubocop +atlassian +clojure +parse-community +tastejs +huginn +go-gitea +pandas-dev +filamentgroup +isocpp +getlantern +ruby +rethinkdb +SnapKit +quilljs +impress +nektos +hyperledger +lewagon +SeleniumHQ +jitsi +httpie +FreeCodeCampChina +layui +SheetJS +id-Software +exacity +FFmpeg +xamarin +bumptech +cockroachdb +beego +ClickHouse +Polymer +standard +googlecreativelab +docker-library +GitbookIO +zalando +sass +codepath +open-guides +algolia +rollup +ConsenSys +AlloyTeam +BVLC +linuxserver +mybatis +rabbitmq +pinterest +spring-attic +ray-project +xmartlabs +brave +metabase +vim +PyCQA +aliyun +mitmproxy +cli +sourcegraph +QSCTech +request +yiisoft +PHPOffice +XX-net +tmux-plugins +netty +emberjs +balderdashy +nats-io +phpDocumentor +quantopian +NativeScript +usablica +googlesamples +odoo +matplotlib +processing +react-boilerplate +iovisor +nylas +zxing +aio-libs +NixOS +cookiecutter +codemirror +openshift +AdguardTeam +naptha +jhipster +stripe +mono +naver +zeromq +gofiber +slackhq +geekcompany +paypal +kivy +CMU-Perceptual-Computing-Lab +weaveworks +vapor +swc-project +certbot +appium +phoenixframework +dart-lang +matrix-org +tmux +cocos2d +ethz-asl +icsharpcode +CocoaPods +gruntjs +envoyproxy +browserify +labstack +ajaxorg +trustedsec +stanfordnlp +celery +ReactiveCocoa +elsewhencode +dokku +postmanlabs +GetStream +redox-os +balena-io +learn-anything +databricks +wbkd +DataDog +Esri +spring-cloud +instillai +libgdx +primer +XiaoMi +pytest-dev +sqlmapproject +electronicarts +bbc +eslint +transloadit +arduino +OAI +gitlabhq +parallax +TypeStrong +coreos +nsqio +http-party +Jam3 +antlr +typelevel +cheeriojs +openwrt +select2 +OfficeDev +mozilla-mobile +Modernizr +thx +BabylonJS +react-component +Flipboard +roots +dbcli +ossrs +boostorg +tqdm +go-kit +elixir-lang +ShareX +numpy +BishopFox +react-bootstrap +railsware +matomo-org +sensepost +ramda +zendesk +fluent +chromium +yandex +visionmedia +cmderdev +keystonejs +paragonie +nginx +hammerjs +seemoo-lab +scrapinghub +Khan +i18next +getredash +datasciencemasters +harvesthq +MostlyAdequate +facebookexperimental +gradle +deezer +pugjs +ampproject +mpv-player +hyperoslo +matterport +VundleVim +vega +SwiftyJSON +redux-saga +aosp-mirror +CodeHubApp +stretchr +hybridgroup +localForage +jupyterlab +Reactive-Extensions +radareorg +uNetworking +VerbalExpressions +froala +ziglang +zulip +openssl +qbittorrent +telegramdesktop +ropensci +mochajs +handsontable +winstonjs +highlightjs +knownsec +discord +strongloop +gliderlabs +fish-shell +libuv +avajs +etsy +aseprite +swoole +requirejs +jumpserver +outline +cucumber +bloomberg +gfwlist +loklak +taosdata +openzipkin +vmware +gravitational +scala +bitcoinbook +ftlabs +phusion +websockets +ipython +reactnativecn +FasterXML +OptimalBits +hyperium +CyanogenMod +llvm +restic +wechaty +keycloak +svg +semantic-release +fullcalendar +notepad-plus-plus +tornadoweb +nodemailer +Hacker0x01 +locustio +scylladb +SecWiki +input-output-hk +RaRe-Technologies +FrontendMasters +shieldfy +osquery +Masterminds +FallibleInc +css-modules +badges +git-tips +netlify +crystal-lang +haskell +js-cookie +googlemaps +woocommerce +MetaMask +amfe +zendframework +android-cn +rbenv +Instagram +vueuse +libretro +playframework +flarum +reactiveui +SonarSource +fbsamples +micropython +h2oai +forkingdog +liriliri +libgit2 +surrealdb +node-red +lensterxyz +nteract +CreateJS +ntop +jsdom +toptal +KnpLabs +akka +SpiderLabs +GNOME +awsdocs +nim-lang +zhihu +exercism +primefaces +lingochamp +dianping +cesanta +ceph +mailgun +NetEase +mit-pdos +TeamStuQ +less +fivethirtyeight +PHPMailer +framer +SAP +octokit +puppetlabs +webrtc +Xilinx +xbmc +fsprojects +phonegap +sbt +vim-airline +opencontainers +cakephp +twilio +plasma-umass +bokeh +bcit-ci +pittcsc +wekan +EnterpriseQualityCoding +mojs +StackExchange +NVIDIAGameWorks +uikit +pld-linux +appbaseio +wikimedia +nfl +lyft +toml-lang +interagent +sqlitebrowser +99designs +chef +jupyterhub +mysqljs +esp8266 +DevExpress +travis-ci +jasmine +seaweedfs +MicrosoftLearning +eBay +asciinema +textmate +knex +owncloud +aurelia +avast +collectiveidea +pouchdb +cisco +FriendsOfPHP +stripe-archive +amazeui +guardian +citusdata +megvii-research +openstf +mockito +dropzone +SublimeText +exyte +gfx-rs +markdown-it +googlefonts +designmodo +statsd +amfoss +tencentyun +slackapi +msgpack +nodeca +phacility +MarlinFirmware +hubotio +workshopper +phalcon +karma-runner +source-foundry +allegro +nette +kubesphere +uzh-rpg +apiaryio +LMAX-Exchange +sparkfun +getgrav +fmtlib +intuit +mkdocs +commitizen +so-fancy +neo4j +wechat-miniprogram +capistrano +grab +dropwizard +NLPchina +infernojs +dutchcoders +kickstarter +magento +greensock +huawei-noah +limetext +reactphp +adobe-webplatform +feathersjs +PerfectlySoft +frida +cloudfoundry +videolan +boto +ppy +seatgeek +pagekit +tinymce +riot +corkami +ARM-software +senchalabs +ether +arangodb +guardianproject +oxford-cs-deepnlp-2017 +yabwe +fashiontec +rime +keybase +dolthub +bower +CosmicMind +OpenVPN +laracasts +ckeditor +prestodb +FreeCAD +cmusatyalab +apex +basho +qiniu +junit-team +IdentityServer +OpenEmu +bitpay +PistonDevelopers +codecentric +IntelLabs +CopyTranslator +pillarjs +Carthage +UKPLab +pre-commit +nginxinc +sinatra +braziljs +amzn +IFTTT +konvajs +mysql +foreversd +douban +mutualmobile +saltstack +xtermjs +whatwg +slimphp +getpelican +artsy +peers +dogecoin +systemd +haiwen +bitly +torch +postlight +eclipse-vertx +opendilab +automl +Moya +googleworkspace +Quick +EsotericSoftware +uutils +ServiceStack +cayleygraph +NUKnightLab +Kitware +aspnetboilerplate +polybar +scotch-io +Intervention +Kozea +venmo +leanote +guard +keen +fabric +MiCode +brix +NodeBB +MithrilJS +reactioncommerce +docopt +ffffffff0x +swimlane +tighten +containrrr +OCA +waditu +chaitin +marko-js +paperjs +BrowserSync +node-inspector +pili-engineering +MyCATApache +OpenRA +readthedocs +jsdoc +spree +apereo +koreader +bytedeco +zerotier +boltdb +linuxmint +18F +java-decompiler +h2o +Bash-it +zaproxy +vimeo +reactor +progit +duckduckgo +CachetHQ +bpmn-io +AudioKit +diaspora +nltk +ajv-validator +valor-software +jquery-archive +jpush +krakenjs +wkhtmltopdf +Tonejs +revel +spring-guides +box +showdownjs +flatiron +systemjs +KeepSafe +summernote +xiph +gollum +go-sql-driver +cosmos +codota +activeadmin +oh-my-fish +kiegroup +badoo +10up +pkg +spinnaker +erlang +stylus +FriendsOfSymfony +willowtreeapps +probml +couchbase +CocoaLumberjack +selectize +AI4Finance-Foundation +nodesource +HazyResearch +micro +softwaremill +coinbase +reactos +kaldi-asr +jikexueyuanwiki +openlayers +resque +networkx +LightTable +alicevision +RPCS3 +stylelint +linnovate +moonlight-stream +tumblr +janestreet +ruby-grape +objcio +mathjax +HandBrake +memcached +purescript +uxsolutions +SFTtech +zzzprojects +gskinnerTeam +webview +dompdf +trailblazer +cujojs +duo-labs +RailsApps +voicerepublic +gopherjs +flatlogic +Netflix-Skunkworks +PrismJS +php-http +SVProgressHUD +popcorn-official +ocaml +vipshop +yhat +rubygems +rspec +sous-chefs +youtube +postgres +overleaf +chocolatey +howdyai +mailhog +cytoscape +App-vNext +Karumi +rack +thecodingmachine +CSAILVision +asciidoctor +real-logic +SpartnerNL +cyclejs +MicrosoftEdge +dataarts +Financial-Times +ovh +StylishThemes +mail-in-a-box +CppCon +robotframework +platformio +OpenHFT +fantasyland +HabitRPG +zio +OpenRCT2 +tmuxinator +mozilla-services +angr +WebKit +ONLYOFFICE +SoftEtherVPN +CodeSeven +sympy +internetarchive +unity3d-jp diff --git a/scripts/assets/repositories.txt b/scripts/assets/repositories.txt new file mode 100644 index 0000000..aa72999 --- /dev/null +++ b/scripts/assets/repositories.txt @@ -0,0 +1 @@ +APIs-guru/openapi-directory \ No newline at end of file diff --git a/scripts/gitstar_ranking_orgs.py b/scripts/gitstar_ranking_orgs.py new file mode 100644 index 0000000..29c090e --- /dev/null +++ b/scripts/gitstar_ranking_orgs.py @@ -0,0 +1,53 @@ +import requests +from bs4 import BeautifulSoup +import os + +def remove_element_at_index(array, index): + if index < 0 or index >= len(array): + raise IndexError("Index out of range") + + new_array = [] + for i in range(len(array)): + if i != index: + new_array.append(array[i]) + + return new_array + +def get_orgs_from_page(url): + # Send an HTTP GET request to the webpage + response = requests.get(url) + organisations= [] + # Check if the request was successful + if response.status_code == 200: + # Parse the HTML content using BeautifulSoup + soup = BeautifulSoup(response.content, "html.parser") + + # Find elements using CSS selectors and extract data + for span in soup.find_all("span"): + if "hidden-xs" in span["class"]: + organisations.append(span.text.strip()) + organisations = remove_element_at_index(organisations, 0) + return organisations + + else: + print(f"Request failed with status code: {response.status_code}") + return None + +all_orgs = [] +# URL of the webpage you want to scrape +for i in range(1, 11): + url = f"https://gitstar-ranking.com/organizations?page={i}" + orgs = get_orgs_from_page(url) + all_orgs.extend(orgs) + print(f"Total organizations: {len(all_orgs)}") +print(all_orgs) +output_directory = "scripts/assets" +os.makedirs(output_directory, exist_ok=True) +output_file = os.path.join(output_directory, "organisations.txt") +with open(output_file, "w") as file: + for org in all_orgs: + file.write(org + "\n") + + + + diff --git a/scripts/seed_script.py b/scripts/seed_script.py index 91b0bd5..3bd3d98 100644 --- a/scripts/seed_script.py +++ b/scripts/seed_script.py @@ -4,12 +4,12 @@ import requests -def call_local_endpoint(prompt): - #TODO: Change this to the correct URL when activesearch endpoint is changed - url = f'http://localhost:8080/database?rootquery="{prompt}"' +def call_local_endpoint(query): + url = f'http://localhost:8080/openapi?{query}' + print(f"Calling {url}") try: - response = requests.get(url) + response = requests.post(url) # Check if the response was successful (status code 200) if response.status_code == 200: @@ -22,10 +22,19 @@ def call_local_endpoint(prompt): except requests.exceptions.RequestException as e: print(f"An error occurred: {e}") +def loadbyorganisations(filename): + orgs_array = [] + with open(filename, 'r') as txtfile: + for line in txtfile: + orgs_array.append(line.strip()) + for org in orgs_array: + query = f"org={org}" + call_local_endpoint(query) + + if __name__ == "__main__": #Get Open API files - call_local_endpoint('openapi: 3') - #Get Swagger files - # call_local_endpoint('"swagger: \"2"') + loadbyorganisations("scripts/assets/organisations.txt") -#PS: Takes a long time to run \ No newline at end of file +#Have to load Swagger Files too +#Load by Repository diff --git a/src/app.ts b/src/app.ts index d567ce5..c5bc0d9 100644 --- a/src/app.ts +++ b/src/app.ts @@ -45,7 +45,7 @@ app.get('/search', async (_req, _res) => { _res.send(results); }); -//openapi2db +//other names: openapi2db app.post('/openapi', async (_req, _res) => { const Repository = _req.query.repo as string; const Organisation = _req.query.org as string; @@ -73,7 +73,10 @@ app.use('/ping', async (_req, _res) => { }); app.get('/', (_req, _res) => { - _res.send('TypeScript With Express'); + const Query = _req.query.q as string; + octokit.rest.search.code({ q: '"openapi: 3"', per_page: 100}).then((response) => { + _res.send(response); + }); }); diff --git a/src/searchtools/search.ts b/src/searchtools/search.ts index 3f71b81..3ccadca 100644 --- a/src/searchtools/search.ts +++ b/src/searchtools/search.ts @@ -28,6 +28,7 @@ export async function activeSearch( per_page: 100, }, (response: any) => { + console.log(response) files = files.concat(response.data); if (files.length >= 200) { processCount++; @@ -94,7 +95,6 @@ export async function passiveSearch( if (result.hits.hits) { if (result.hits.hits.length === 0) { console.error('No results found in the database'); - // activeSearch(query, "", "", "", esClient); } return result.hits.hits; } diff --git a/src/searchtools/searchutils.ts b/src/searchtools/searchutils.ts index 8e0c30e..eb85e2b 100644 --- a/src/searchtools/searchutils.ts +++ b/src/searchtools/searchutils.ts @@ -44,7 +44,8 @@ export async function queryBuilder( if (prompt == undefined) { prompt = ''; } - let query; + let query : string; + let filter : string; if (rootquery != undefined) { if (rootquery === 'openapi') { query = 'openapi: 3' @@ -53,16 +54,19 @@ export async function queryBuilder( } return query; } - query = prompt + ' AND "openapi: 3"'; - // query+= prompt + ' AND "swagger: \\"2"' if (repo != undefined) { - query += '+repo:' + repo; + filter = '+repo:' + repo + ' '; } else if (organisation != undefined) { - query += '+org:' + organisation; + filter = 'org:' + organisation + ' '; } else if (username != undefined) { - query += '+user:' + username; + filter += '+user:' + username + ' '; + } + if(prompt) { + // query = filter + prompt + ' "openapi: 3"'; + query = filter + prompt + ` "swagger: \\"2"`; } else { - return query; + // query = filter + ' "openapi: 3"'; + query = filter + ' "swagger: \\"2"'; } return query; }