# Setup

In [None]:
from google.colab import drive

drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
%cd gdrive/MyDrive/News+LPReports/News-Classifiers/summary

/content/gdrive/MyDrive/News+LPReports/News-Classifiers/summary


In [None]:
! nvidia-smi

Mon Jun 21 08:40:26 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 465.27       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   56C    P8    10W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
!pip install transformers ipython-autotime sentencepiece

Collecting transformers
[?25l  Downloading https://files.pythonhosted.org/packages/00/92/6153f4912b84ee1ab53ab45663d23e7cf3704161cb5ef18b0c07e207cef2/transformers-4.7.0-py3-none-any.whl (2.5MB)
[K     |████████████████████████████████| 2.5MB 33.0MB/s 
[?25hCollecting ipython-autotime
  Downloading https://files.pythonhosted.org/packages/b4/c9/b413a24f759641bc27ef98c144b590023c8038dfb8a3f09e713e9dff12c1/ipython_autotime-0.3.1-py2.py3-none-any.whl
Collecting sentencepiece
[?25l  Downloading https://files.pythonhosted.org/packages/ac/aa/1437691b0c7c83086ebb79ce2da16e00bef024f24fec2a5161c35476f499/sentencepiece-0.1.96-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2MB)
[K     |████████████████████████████████| 1.2MB 39.5MB/s 
[?25hCollecting tokenizers<0.11,>=0.10.1
[?25l  Downloading https://files.pythonhosted.org/packages/d4/e2/df3543e8ffdab68f5acc73f613de9c2b155ac47f162e725dcac87c521c11/tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_

In [None]:
from transformers import (
    AdamW,
    T5ForConditionalGeneration,
    T5Tokenizer,
    get_linear_schedule_with_warmup
)
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
import random
import numpy as np

def set_seed(seed):
  random.seed(seed)
  np.random.seed(seed)
  torch.manual_seed(seed)

set_seed(42)

# Few shot learning (training)
https://towardsdatascience.com/poor-mans-gpt-3-few-shot-text-generation-with-t5-transformer-51f1b01f843e

In [None]:
# choose which model you are using
model_name = 't5-base'

# ensure gpu is being utilized
device = "cuda:0" if torch.cuda.is_available() else "cpu"
print(device)

model = T5ForConditionalGeneration.from_pretrained(model_name)
model = model.to(device) # send model to gpu

tokenizer = T5Tokenizer.from_pretrained(model_name)

cuda:0


In [None]:
# optimizer
no_decay = ["bias", "LayerNorm.weight"]
optimizer_grouped_parameters = [
    {
        "params": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],
        "weight_decay": 0.0,
    },
    {
        "params": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)],
        "weight_decay": 0.0,
    },
]
optimizer = AdamW(optimizer_grouped_parameters, lr=3e-4, eps=1e-8)

In [None]:
# dataset preparation

summary_tuples = [
                  ('summarize: Part of the relief act could be used to pay for laptops or tablets, and Internet connections for underprivileged families..On Friday last week, a teacher at Taipei Municipal Jianguo High School became an Internet sensation after he was found using 17Live, a livestreaming app previously known for pornographic content, to teach his students remotely..While that was creative, it also speaks to a lack of standards in place for distance learning..Increased relief funds are a positive development, but that should be accompanied by good planning and assistance to those who most need it â€” including consultations on how to transition work or learning to an online model.',
                   'A teacher became an Internet sensation after he was found using 17Live to teach his students remotely. This also shows the lack of standards for remote learning. Increased relief funds, accompanied by good planning and assistance will help transition learning to an online model.'),
                  ('summarize: That we achieved this in the middle of a pandemic is even more incredible, and makes us think about the possibilities as things return to normal.â€ As of this month, Mindset Ventures has invested in more than 50 companies in the US and Israel, with an overarching goal to help companies expand into Brazil and Latin America..Mindset Venturesâ€™ previous two funds have resulted in a variety of recent milestones for its portfolio companies including: Prodigy, an early exit in Fund III, was acquired by Upstart earlier this year Brex ($7.4 billion valuation) Voicea (acquired by Cisco) DiA partnered with Philips to enhance ultrasound with AI-based image quantification Atidot and Guardian partnered to create new insurance models and customer experience using AI and predictive analytics Turing placed among Fast Companyâ€™s 10 most innovative workplaces of 2021..These companies, and the rest of Mindset Venturesâ€™ portfolio companies develop innovative technologies that make inroads internationally.',
                   'As of this month, Mindset Ventures has invested in more than 50 companies in US and Israel to help them expand into Brazil and Latin Aamerica. Their portfolio companies have achieved variety of milestones: Prodigy was acquired by Upstart, Voicea acquired by Cisco, DiA partnered with Philips to enhance ultrasound and Atidot partnered with Guardian to create new insurance models and customer experience through AI and predictive analysis.'),
                  ('summarize: And no surprise, 55% also said the switch to remote work was the top driver of this increased complexity..Thatâ€™s according to a recent report from Axonius, a cybersecurity asset management solution provider..CFOs know these complexities brought on by remote work can lead to numerous problems â€“ from lack of visibility to security breaches to operational risk â€“ that can have negative financial impacts..(The survey confirmed this, with respondents expecting 40% of their workforce to continue working remotely post-pandemic.)',
                   'A recent report from Axonius show that 55% of people said to the switch to remote work was the top reason for increased complexity. CFOs understand that these complexities can lead to lack of visibility, security breaches and operational risks, causing negative financial impacts.'),
                  ('summarize: Bicycle Therapeutics plc (NASDAQ: BCYC), a biotechnology company pioneering a new and differentiated class of therapeutics based on its proprietary bicyclic peptide (BicycleÂ®) technology, today announced that management will participate in the following virtual investor conferences in June: Jefferies Virtual Healthcare Conference on Thursday, June 3, 2021; fireside chat at 11:00 a.m..ET Goldman Sachs 42 nd Annual Global Healthcare Conference on Tuesday, June 8, 2021; fireside chat at 8:00 a.m..ET The JMP Securities Life Sciences Conference on Thursday, June 17, 2021; fireside chat at 12:00 p.m..Archived replays of the webcasts will be available for 30 days following the presentation dates..About Bicycle Therapeutics Bicycle Therapeutics (NASDAQ: BCYC) is a clinical-stage biopharmaceutical company developing a novel class of medicines, referred to as BicyclesÂ®, for diseases that are underserved by existing therapeutics..Bicycles are fully synthetic short peptides constrained with small molecule scaffolds to form two loops that stabilize their structural geometry..This constraint facilitates target binding with high affinity and selectivity, making Bicycles attractive candidates for drug development..Bicycle is evaluating BT5528, a second-generation Bicycle Toxin Conjugate (BTC) targeting EphA2, and BT8009, a second-generation BTC targeting Nectin-4, a well-validated tumor antigen, in company-sponsored Phase I/II trials..In addition, BT1718, a BTC that targets MT1-MMP, is being investigated in an ongoing Phase I/IIa clinical trial sponsored by the Centre for Drug Development of Cancer Research UK..Bicycle is headquartered in Cambridge, UK with many key functions and members of its leadership team located in Lexington, MA..For more information, visit bicycletherapeutics.com..In the UK and Ireland, the gainers were led by pharmaceutical company Avadel Pharmaceuticals (AVDL) and mining company Rio Tinto Group (RIO), which rose 5.2% and 3.2% respectively..They were followed by mining company BHP Group (BBL) and biopharmaceutical company Akari Therapeutics (AKTX), which were up 3.3% and 2.7% The decliners in the UK and Ireland were led by biopharmaceutical companies Bicycle Therapeutics (BCYC) and MorphoSys (MOR), which lost 3.6% and 1.2% respectively..They were followed by pharmaceutical company GlaxoSmithKline (GSK) and biopharmaceutical company Mereo BioPharma Group (MREO), which were off 1% and 0.3%.',
                   'Bicycle Therapeutics is a biotechnology company pioneering a new class of therapeutics based on its (BicycleÂ®) technology. It has announced that it will be participating in various virtual conferences in June. Bicycle is an attractive candidate for drug development as the drugs have target binding with high affinity and selectivity. Bicycle is evaluating BT5528 and BT8009 targeting EphA2 and Nectin-4 respectively, in Phase I/II trials. In addition, BT1718 is being investigated in ongoing Phase I/IIa clinicial trial.'),
                  ('summarize: But now, the whole voting process has been shifted to Firstcryâ€™s app and website..Those who wish to vote for their favourite contestants can visit Firstcry and register their vote there..Hereâ€™s how you can vote for your favourite Indian Idol contestants and save them from elimination: Go to the Firstcry app (It can be downloaded from Google Play Store) or www.firstcry.com At the app or the website, login to Firstcry with your email id or Facebook account..Then, go to the search bar and type â€œIndian Idol votingâ€..However, Mensa Brands isnâ€™t the first such adoptor of Thrasio model in India..Startups such as Powerhouse 91 funded by Titan Capital, and Softbank-backed Firstcry also have made strides in this segment in India..Pune-based baby products marketplace Firstcry is currently one of the largest valued startups in the whitelabel D2C segment which is currently valued at over $2Bn..Firstcry had entered the unicorn club last year, after raising $296 Mn (INR 2,120 Cr) in a Series E round from Japan-based Softbankâ€™s Vision Fund in February 2020..India is currently seeing an uptick of smaller brands, with around 850K vendors selling on Amazon India..The brandâ€™s official website has seen major footfall over the last three months, and the net percentage of entire sales is approximately 35-40 percent..Ecommerce platforms such as Amazon, Firstcry, Flipkart and Nykaa comprise 50 percent of the sales, while retail markets, including baby hypermarkets in metro cities, contribute to 10 percent of incoming revenue..Diversifying the product portfolio According to Himanshu, the brandâ€™s ethos and approach lie in innovation and thorough R&D.',
                   "Indian Idol's whole voting process has been shifted to Firstcry's app and website. Startups such as Powerhouse 91 amd Firstcry have made strides in the pune-based baby products marketplace in India. Firstcry is one of the largest startups in the whitelabel D2C segment currently valued at over $2Bn. Firstcry entered the unicorn club last year after raising $296 Mn in Series E round. E-commerce platforms such as Amazon, Firstcry, Flipkart and Nykaa comprise 50% of sales."),
                  ("summarize: Inmagene Biopharmaceutical has wholly-owned subsidiaries in China and the United States, and has successfully raised about US $140 million..Qichensheng biology has completed several hundred million yuan of pre-A + round financing..With mRNA technology as the core, the company has established two core technology platforms of 'one body (mRNA) and two wings (cell, nano)'.On June 6, shangdao.com officially learned that Inmagene Biopharmaceutical recently announced the completion of US $100 million round C financing..The investor is jointly led by Honghui capital and its existing shareholder, Ruifu medical venture capital fund, new investors such as Qianhai Qinzhi, Yipu capital and dingpei asset management, as well as other existing shareholders such as Zhongnan venture capital and Kunlun capital..Three wheel drive Inmagene Biopharmaceutical is an innovative drug enterprise focusing on immune related diseases..The founding team has led the clinical development of more than 20 global heavyweight new drugs in their respective companies..It is reported that the pipeline of Inmagene Biopharmaceutical is 'three wheel drive'..Inmagene Biopharmaceutical (inmagene biopharmaceuticals) announced the completion of $100 million round C financing..People close to the regulatory authorities said that the regulatory authorities did not agree with the specific point of the forecast, and believed that the release of research views should be objective, professional and prudent, avoiding arbitrarinessï¼ˆ Securities Dealers (China).This round of financing is jointly led by highlight capital and its existing shareholders, panacea venture..Its most mature product img-020 will enter phase 2 or phase 3 clinical trials for multiple indications in the United States, China and Europe..The pipeline of Inmagene Biopharmaceutical is driven by 'three wheels'..First of all, Inmagene Biopharmaceutical will introduce overseas products suitable for China's national conditions into China, carry out global multi center clinical trials with overseas partners, and share the rights and interests of overseas markets on the basis of in-depth cooperation and collaborative development.",
                   "Inmagene Biopharmaceutical has wholly-owned subsidiaries in China and United States and has successfully raised about US$140 million. It recently announced the completion of US $100 million round C financing. The founding team has led the clinical development of more than 20 global heavyweight new drugs. Its most mature product img-020 will enter phase 2 or 3 clinical trials in the United States, China and Europe."),
                  ("summarize: TEL AVIV, Israel, June 2, 2021 /PRNewswire/ -- Innoviz Technologies (Nasdaq: INVZ), a technology leader of high-performance, solid-state LiDAR sensors and perception software, is scheduled to participate at the following virtual financial conferences during June 2021: Innoviz Technologies Logo (PRNewsfoto/Innoviz Technologies) 5th Annual Needham Virtual Automotive Tech Conference Presentation and 1x1s on Tuesday, June 8th at 8:00 a.m. Eastern time Webcast: Link UBS Global Industrials and Transportation Virtual Presentation and 1x1s on Wednesday, June 9th at 9:00 a.m. Eastern time Webcast: Link Deutsche Bank Global Auto Industry Conference Presentation and 1x1s on Wednesday, June 16th at 11:00 a.m. Eastern time Baird Autonomous Vehicle Webcast Series Fireside chat on Wednesday, June 16th at 12:00 PM Eastern time Goldman Sachs Inaugural Digital Economy Conference Fireside Chat and 1x1s on Friday, June 18th at 8:20 a.m. Eastern time Credit Suisse Mobility Start-up Forum 1x1s on Tuesday, June 22nd Israeli Investor 1x1 or Group Meeting with Management To schedule a meeting with CEO, Omer Keilaf and CFO, Eldar Cegla, please contact Maya Lustig: maya.lustig@innoviz-tech.com..About Innoviz Technologies Innoviz is a leading provider of technology that will put autonomous vehicles on roads..Innoviz's LiDAR technology can 'see' better than a human driver and meets the automotive industry's strict expectations for performance, safety and price..Selected by BMW for its fully autonomous car program, Innoviz's technology will be deployed in BMW's consumer vehicles..Innoviz is backed by top-tier strategic partners and investors, including SoftBank Ventures Asia, Samsung, Magna International, Aptiv, Magma Venture Partners, and others..For more information, visit www.innoviz.tech..Join the discussion: Facebook, LinkedIn, YouTube, Twitter Forward Looking Statements This announcement contains certain forward-looking statements within the meaning of the federal securities laws, including statements regarding the services offered by Innoviz, the anticipated technological capability of Innoviz's products, the markets in which Innoviz operates and Innoviz's projected revenue and other future financial and operational results..These forward-looking statements generally are identified by the words 'believe,' 'project,' 'expect,' 'anticipate,' 'estimate,' 'intend,' 'strategy,' 'future,'  and similar expressions..The foregoing list of factors is not exhaustive..You should carefully consider the foregoing factors and the other risks and uncertainties described in Innoviz's annual report on Form 20-F filed with the SEC on April 21, 2021 and other documents filed by Innoviz from time to time with the SEC..These filings identify and address other important risks and uncertainties that could cause actual events and results to differ materially from those contained in the forward-looking statements..Forward-looking statements speak only as of the date they are made..Readers are cautioned not to put undue reliance on forward-looking statements, and Innoviz assumes no obligation and does not intend to update or revise these forward-looking statements, whether as a result of new information, future events, or otherwise..Innoviz gives no assurance that it will achieve its expectations..Innoviz Technologies Ltd (NASDAQ: INVZ) shares fell -1.0370% to end trading Tuesday at $10.50 per share - a net change of $-0.11..Visit Innoviz Technologies Ltdâ€™s profile for more information..About The Nasdaq Stock Market The Nasdaq Stock Market is a global leader in trading data and services, and equities and options listing..Nasdaq is the world's leading exchange for options volume and is home to the five largest US companies - Apple, Microsoft, Amazon, Alphabet and Facebook..To get more information on Innoviz Technologies Ltd and to follow the companyâ€™s latest updates, you can visit the companyâ€™s profile page here: Innoviz Technologies Ltdâ€™s Profile..For more news on the financial markets be sure to visit Equities News..However, there are a couple of other lidar players that are likely to go public this year and this could give investors more investment options, potentially reducing demand for Luminar stock..For example, Innoviz Technologies, an Israel-based lidar company is likely to go public sometime in Q1 this year via a SPAC merger, while other lidar players AEye and Aeva are also likely to go public shortly taking a similar route..See our dashboard analysis Velodyne Vs. Luminar: Which Lidar Stock Should You Pick?.These companies are being joined by others that are jumping onto the bandwagon via SPAC mergers, which have opened a new avenue for tech firms to go public.",
                   "Innoviz Technologies is a leader of high-performance, solid-state LiDAR sensors and perception software, and is scheduled for virtual financial conferences during June 2021. Innoviz's LiDAR technology meets the automotive industry's strict expectations for performance, safety and price. It has been selected by BMW for its fully autonomous car program and is backed by top-tier strategic partners and investors like Softbank, Samsung, Magna, Aptiv and others."),
                  ("summarize: Cost performance index: 5-star professional index: 8-star suitable for people: business people with certain consumption ability..When you go shopping, you will meet LOHO's optical shop..The price is still appropriate, but the lens is not so good..In this forum, LOHO won the 2020 outstanding star show award of China shopping mall industry of China purchasing Federation by virtue of its own strength..The forum is sponsored by China Shopping Center Development Committee of Zoomlion..Therefore, the purpose of this forum is to gather with more than 1000 elites in the industry, such as commercial real estate developers, operators and retailers, to explore the 'pattern change & reshaping way'..The award-winning is also another high recognition of LOHO as the first new retail fashion glasses brand in China..LOHO is a leader and innovator in China's glasses industry..It will continue to promote the popularization and development of the fashion concept of glasses accessories in China, and strive to become the first new brand of fashion glasses in the minds of users..Glasses need more than one pair..At present, LOHO has more than 1000 brand stores nationwide, covering the core business districts of more than 150 cities, including Beijing, Shanghai, Guangzhou, Shenzhen, Nanjing, Chongqing, Chengdu and Changsha..Since its establishment in 2014, index capital has served hundreds of excellent enterprises at home and abroad, including Gaiya factory, Weimeng, boss direct employment, rongyun, Yilu software, yunxuetang, Deyi microelectronics, Yuanxin technology, Zhiyun health, zero krypton technology, peanut car, super orangutan, song Xiaocai, LOHO, Wanxue education, etc., centering on the three new economic channels of consumption, technology and medical treatment, Help them to complete the capital accumulation and occupy the competitive advantage..As for Jingwei China, Jingwei China is the most active VC organization in the early market.",
                   "LOHO won the 2020 outstanding star show award of China's shopping mall industry. The award-winning is also another high recognition of LOHO as the first new retail fashion glasses brand in China. It will continue to promote the popularization and development of the fashion concept of glasses accessories. Currently, LOHO has more than 1000 brand stores nationwide, covering the business districts of more than 150 cities.0"),
                  ("summarize: and to portray the many nakhras that meat lovers have..It is not a pleasant experience!.But thatâ€™s just who meat lovers are- their passion for great meat & seafood fuels their determination to source the choicest cuts and cook those delicious dishes from which they derive all the pleasure.The way India experiences meat underwent a transformation in 2015, when Licious , meat and seafood brand, came into existence..Licious started their journey with a belief that India deserves better meat & went on to learn all the nuances & nitty-gritties of what great meat & seafood stands for!.And, over the last 5 years they have perfected this knowledge to understand all the nakhras of the meat lover..Be it the nakhra of 150+ quality checks, the nakhra of immaculate packaging or the nakhra of an unbroken cold chain - it is Liciousâ€™ love for the most delectable meat & seafood that drives them to be obsessively fussy & extremely passionate!The newest brand campaign brings the idea of Meat Lover's Nakhras to life..In their biggest ever mass-media campaign Licious has worked with Bollywood actors Anil Kapoor and Arjun Kapoor to bring this core thought to life.Meghna Apparao, Chief Business Officer, Licious, said, â€œThere are meat eaters & then there are meat lovers..For meat lovers, great quality meat & seafood is lot more than just food; it is a thing of joy, it is a great time shared with family & friends over a meal, it is the immense satisfaction of creating a dish that is heart-warming..Meat lovers go to great lengths to ensure the meat they buy is perfect in every sense - fresh, hygienic, safe & free of antibiotics..Thatâ€™s where Licious comes in!.Over the last 5 years we have been working towards a deeper understanding of meat-lovers' nakhras and combining them with our own, in a continuous process of product development..We pride ourselves in our ability to devise and improve on our stringent quality control measures, animal rearing and handling best practices, so that our consumers get nothing but the best - every single time..Licious is a brand created by the meat lovers for the meat lovers..It is a delight to see Anil Kapoor & Arjun Kapoor showcasing their love for meat in a way that strikes a chord with us..After all, it is only one nakhrebaaj that understands another!â€Adding on, Apparao also mentioned, â€œThe role Licious plays is more pertinent now than ever..We have been working very hard since last year to serve our consumers the best of meat & seafood while keeping them safe & homebound..A good meal is great comfort- specially in times like these..The Licious promise in to ensure that we keep inspiring your inner cook & be an innate part of your culinary journey.â€Licious has worked with TILT to craft two light-hearted advertisement films with uncle-nephew duo Anil Kapoor & Arjun Kapoor to portray the many nakhras that meat lovers have..The film shows the duo bonding over cooking, sharing some fish-fry and tangdi kebab and overall having a great time..And just like with food, this experience was made even better with Arjun's company, so it was a win-win situation for me!.I am super impressed with all the good work that Licious is doing in disrupting the meat & seafood ecosystem of the country..It is truly heartening to see the impact of their transformative work & I'm happy to be a part of their story.â€Adding on to the above Arjun Kapoor commented, â€œI am delighted to be associated with the brand Licious..The high-octane, positive energy is palpable on the set..I had a great time collaborating with Anil chachu and I hope people will enjoy the ad film as much as we did shooting for the same.â€The campaign sees manifestation through TVCs & digital films other than its ramification across all brand assets.Targeted at national audience, the campaign goes LIVE on 3rd June 2021.Disclaimer- The films were shot before the state-wide lockdowns were announced..Licious acquired the necessary permissions & all COVID safety protocols were followed & maintained before, during & post shoot..ShackStream: Indie-licious takes a trip in An Airport for Aliens Currently Run by Dogs Aliens gotta travel too..Home delivery operations may be carried out through the back or side doors (without opening the main entrance) wherever possible..Grocery stores (both large format or supermarkets and local stores) shall be allowed to arrange for home delivery through pushcarts and vehicles-on-demand through phone or delivery apps like Dunzo, Supr Daily, Big Basket, Licious, Dunzo, Tendercuts etc..As per the Chief Ministerâ€™s statement, this will be allowed from 7 am to 6 pm..Locus helps its clients automate their logistics workload â€” tasks such as planning, organizing, transporting and tracking of inventories, and finding the best path to reach a destination â€” that have traditionally required intensive human labor, said Nishith Rastogi, CEO of Locus, in an interview with TechCrunch..â€œWhen you order from Licious or BigBasket, for instance, they need to decide each day at their centres how many vehicles they need to use, and what size of vehicles they need to go with,â€ Rastogi explained..These clients, he said, also need to assign drivers based on how familiar they are with the delivery area, and factor in the traffic to determine at what time they should leave for delivery..Here are links to some popular cicada recipes..Cicada cookies come from the 2004 cookbook â€œCicada-Licious: Cooking and Enjoying Periodical Cicadas.â€ Author Jenna Jadin said cicadas are a natural protein source..Spicy Popcorn Cicadas are soaked in Worcestershire sauce before coating it in an egg and flour mix and frying it..Our YouTube channel is home to a trove of exclusive content.",
                   "The way India experiences meat underwent a transformation in 2015 when Licious came into existence. Over the last 5 years, they have perfected the knowledge to understand all the nakhras of meat lovers. Their biggest media campaign saw them working with Bollywood actors to bring this thought to the public. Licious prides themselves in their ability to improve on quality control measures, animal rearing and handling best practices. The role of Licious is more petinent now to serve customers the best meat and seafood while keeping them safe and homebound."),
                  ("summarize: Bank Rakyat Indonesia Agroniaga (BRI Agro) has inked a deal with local agent-based fintech company Payfazz..Using the API (application programming interface) of its signature digital lending app Pinang, BRI Agroâ€™s digital savings and lending solutions will be channeled to 250,000-plus Payfazz agents as end-users..In turn, agents will also be able to relay the same offerings to their collective 10 million monthly active customers across rural Indonesia..In 2020, BRI Agroâ€™s fintech partnerships resulted in IDR215 billion (~US$14.8 million) worth of loans disbursed across third-party apps..â€œGig and service economies gained a stronger foothold in rural Indonesia due to accelerated digital adoption caused by the Covid-19 pandemic, but many of these microentrepreneurs still lack access to basic banking services that can help them grow their businesses, mitigate risks, and build healthy credit profiles..With Payfazz acting as yet another channel for our digital savings and micro-lending solutions, BRI Agro takes a giant step closer to becoming the financial home for Indonesiaâ€™s gig economy..We are especially excited to bring Pinang â€” with its flat 1.24% interest rate and two-minute loan approval process â€” to over 10 million non-urban Indonesians via Payfazz.â€ Hendra Kwik, Payfazzâ€™s CEO and Co-Founder said, â€œHere at Payfazz, we see banks and other fintech outfits not as competitors, but as potential partners..We concentrate the vast majority of our resources in rural areas..Bank Rakyat Indonesia Agroniaga (BRI Agro) has teamed up with local agent-based Fintech firm Payfazz..Using the API of its digital lending app Pinang, BRI Agroâ€™s online savings and lending solutions will be offered to 250,000+ Payfazz agents..Agents can provide the same service to their 10 million monthly active customers across rural Indonesia.",
                   "Bank rakyat Indonesia Agroniaga (BRI Agro) has signed a deal with fintech company Payfazz. BRI Agro's digital savings and lending solutions will be channeled to 250,000 Payfazz agents. In return, agents will relay the same offerings to their collective 10 million monthly active customers in Indonesia. This partnership resulted in US$14.8 million loans disbursed across third-party apps. Payfazz see banks and other fintech outfits as potential partners, and concentrate majority of resources in rural areas."),
                  ("summarize: 'Source: cinno ID: cinno_ Yin Zhanjiang, assistant to the president of Skyworth color TV company, said that Skyworth Mini Technology Industrial Park will be located in Dongxihu District of Wuhan on May 24, 2021 hot summer hard work economic and trade fair and Wuhan second quarter investment project signing conference..The launch of this generation of products will mark the maturity of the commercialization of mini LED backlight..At the supply chain end, since 2020, the upstream epitaxial chip factories represented by San'an, Huacan and Jingdian, the module solution factories such as Guoxing and Ruifeng, the downstream led terminal factories such as Lehman, the substrate factories represented by Vogel, as well as the panel factories such as BOE, Huaxing optoelectronics, vicino and Youda, the brand factories such as Sony, Samsung, Konka and TCL, and the overall solution suppliers such as PlayNitride and elux, They have invested a lot in R & D and production in this field..In 2020 alone, the investment in mini / micro LED related fields has reached as high as 24 items, with a total amount of 25.2 billion yuan..Xinruida plans to build a mini LED display project in Tianjin; More than $100 million!.On March 25, the application of Guangdong Zhongtu Semiconductor Technology Co., Ltd. (hereinafter referred to as 'Zhongtu technology') for listing on the science and technology innovation board has been accepted..PlayNitride plans to raise funds on a large scale to get ahead of mass production of micro LEDs..According to Taiwan media moneydj, Li Yunli, chairman of PlayNitride, recently pointed out that the mainland has a very active investment in micro LEDs, and PlayNitride will not invest less..In the past few years, PlayNitride has raised 80 million U.S. dollars, and then there is a wave of financing plans..The total amount of financing is expected to be 80 million U.S. dollars to 100 million U.S. dollars..According to PlayNitride, the main purpose is to establish a certain scale of its own production line, from long crystal / epitaxial, chips, to massive transfer, and even modules..It is estimated that the location of the new production line is still in Taiwan..It will take about one year to build the production line, and it will be put into production in the second half of next year at the earliest..After the completion of the new production line in the future, the company will have a certain scale of production capacity in each process, and there will be more flexibility in business..At present, the massive transfer technologies of various companies are not the same..PlayNitride claims that it can transfer 360000 micro LEDs at most at one time..If the yield after transfer is 99.5%, the yield after repair can reach 99.999% to 100%..However, the development of micro LED has attracted much attention..Future applications include industrial control, medical, home appliances and so on..Its graphical substrate products are mainly used for the internal use of San'an optoelectronics..Fujian Jing'an and the PlayNitridere in the first echelon of the industry with a large scale of production capacity..According to Chairman & CEO Dr. Liu Zhaojun, this round of financing will be mainly used to improve the micro led pilot line, expand the R & D team and improve the overall R & D level of the company..In 2018, the PlayNitridelso won the angel round investment of Saifu investment fund.",
                   "PlayNitride plans to raise funds to get ahead of mas production of micro LEDs. In the past few years, PlayNitride has raised 80 million USD, and there is a wave of financial plans expected to be about 80-100 million USD. The main purpose is to establish a certain scale of its own production line, and will be put into production in the second half of next year at the earliest. The development of micro LED has attracted much attention, with future applications like industrial control, medical and home appliances. ")
                                    
]

In [None]:
model.train()

epochs = 10

for epoch in range(epochs):
  print ("epoch ",epoch)
  for input,output in summary_tuples:
    input_sent = input+ " </s>"
    ouput_sent = output+" </s>"

    tokenized_inp = tokenizer.encode_plus(input_sent,  max_length=512, pad_to_max_length=True,return_tensors="pt")
    tokenized_output = tokenizer.encode_plus(ouput_sent, max_length=512, pad_to_max_length=True,return_tensors="pt")


    input_ids  = tokenized_inp["input_ids"]
    input_ids = input_ids.to(device) # ensure inputs and model on same device
    attention_mask = tokenized_inp["attention_mask"]
    attention_mask = attention_mask.to(device)

    lm_labels= tokenized_output["input_ids"]
    lm_labels = lm_labels.to(device)
    decoder_attention_mask=  tokenized_output["attention_mask"]
    decoder_attention_mask = decoder_attention_mask.to(device)


    # the forward function automatically creates the correct decoder_input_ids
    output = model(input_ids=input_ids, labels=lm_labels,decoder_attention_mask=decoder_attention_mask,attention_mask=attention_mask)
    loss = output[0]

    loss.backward()
    optimizer.step()
    optimizer.zero_grad()

epoch  0


  f"This sequence already has {self.eos_token}. In future versions this behavior may lead to duplicated eos tokens being added."


epoch  1
epoch  2
epoch  3
epoch  4
epoch  5
epoch  6
epoch  7
epoch  8
epoch  9


In [None]:
model.save_pretrained('model')

# Load trained model to run predictions

In [None]:
# choose which model you are using
model_name = 't5-base'

# ensure gpu is being utilized
device = "cuda:0" if torch.cuda.is_available() else "cpu"
print(device)

new_model = T5ForConditionalGeneration.from_pretrained('model')
new_model = new_model.to(device) # send model to gpu

tokenizer = T5Tokenizer.from_pretrained(model_name)

cuda:0


In [None]:
df = pd.read_csv('output/150621_after_summary_labelled.csv', index_col=0)
df.head(3)

Unnamed: 0,Search Query,article,URL,signal,summary,manual_summary
36,17Live,summarize: Part of the relief act could be use...,['https://www.taipeitimes.com/News/editorials/...,market,teacher used livestreaming app 17Live to teach...,A teacher became an Internet sensation after h...
0,Atidot,summarize: That we achieved this in the middle...,['https://finance.yahoo.com/news/mindset-ventu...,revenue,Mindset Ventures has invested in more than 50 ...,"As of this month, Mindset Ventures has investe..."
38,Axonius,"summarize: And no surprise, 55% also said the ...",['https://www.cfodailynews.com/news/it-complex...,market,55% also said the switch to remote work was th...,A recent report from Axonius show that 55% of ...


In [None]:
# https://huggingface.co/blog/how-to-generate

def summarize(text):
  input_ids = tokenizer.encode(text, return_tensors="pt", add_special_tokens=True, max_length=512)
  input_ids = input_ids.to(device) # ensure inputs and model on same device

  # generated_ids = model.generate(input_ids=input_ids, num_beams=2, max_length=max_length,  repetition_penalty=2.5, length_penalty=1.0, early_stopping=True)
  generated_ids = new_model.generate(input_ids=input_ids, 
                                 min_length=100, 
                                 max_length=300, 
                                 length_penalty=2.0, # set penalty for increasing length (> 1 means force to increase length)
                                 num_beams=4, # reduces the risk of missing hidden high probability word sequences, reduces repeats, makes words more surprising and less probable 
                                #  early_stopping=True,
                                #  no_repeat_ngram_size=4 # no 4-gram appears twice
                                 repetition_penalty = 2.0,
                                #  do_sample=True,
                                #  top_p=0.92,
                                #  top_k=0
                                 )
  preds = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=True) for g in generated_ids]

  return preds[0]

In [None]:
%load_ext autotime
# display time for each cell execution

time: 110 µs (started: 2021-06-17 09:52:38 +00:00)


In [None]:
df['after'] = df['article'].apply(lambda x: summarize(x))

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


time: 3min 59s (started: 2021-06-17 09:53:04 +00:00)


In [None]:
df.to_csv('output/170621_tune.csv')

time: 390 ms (started: 2021-06-17 09:57:03 +00:00)


# PEGASUS for financial summarization
https://huggingface.co/human-centered-summarization/financial-summarization-pegasus

In [None]:
from transformers import PegasusTokenizer, PegasusForConditionalGeneration

In [None]:
# ensure gpu is being utilized
device = "cuda:0" if torch.cuda.is_available() else "cpu"
print(device)

# Let's load the model and the tokenizer 
model_name = "human-centered-summarization/financial-summarization-pegasus"

tokenizer = PegasusTokenizer.from_pretrained(model_name)
model = PegasusForConditionalGeneration.from_pretrained(model_name)
model = model.to(device) # send model to gpu

cuda:0


HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1912529.0, style=ProgressStyle(descript…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1341.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1436.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1274.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=2275419259.0, style=ProgressStyle(descr…




In [None]:
df = pd.read_csv('output/150621_after_summary_labelled.csv', index_col=0)
df.head(3)

Unnamed: 0,Search Query,article,URL,signal,summary,manual_summary
36,17Live,summarize: Part of the relief act could be use...,['https://www.taipeitimes.com/News/editorials/...,market,teacher used livestreaming app 17Live to teach...,A teacher became an Internet sensation after h...
0,Atidot,summarize: That we achieved this in the middle...,['https://finance.yahoo.com/news/mindset-ventu...,revenue,Mindset Ventures has invested in more than 50 ...,"As of this month, Mindset Ventures has investe..."
38,Axonius,"summarize: And no surprise, 55% also said the ...",['https://www.cfodailynews.com/news/it-complex...,market,55% also said the switch to remote work was th...,A recent report from Axonius show that 55% of ...


In [None]:
# https://huggingface.co/blog/how-to-generate

def summarize(text):
  input_ids = tokenizer.encode(text, return_tensors="pt", add_special_tokens=True, max_length=512)
  input_ids = input_ids.to(device) # ensure inputs and model on same device

  # generated_ids = model.generate(input_ids=input_ids, num_beams=2, max_length=max_length,  repetition_penalty=2.5, length_penalty=1.0, early_stopping=True)
  generated_ids = model.generate(input_ids=input_ids, 
                                 min_length=100, 
                                 max_length=300, 
                                 length_penalty=2.0, # set penalty for increasing length (> 1 means force to increase length)
                                 num_beams=4, # reduces the risk of missing hidden high probability word sequences, reduces repeats, makes words more surprising and less probable 
                                #  early_stopping=True,
                                #  no_repeat_ngram_size=4 # no 4-gram appears twice
                                 repetition_penalty = 2.0,
                                #  do_sample=True,
                                #  top_p=0.92,
                                #  top_k=0
                                 )
  preds = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=True) for g in generated_ids]

  return preds[0]

In [None]:
%load_ext autotime
# display time for each cell execution

time: 77.5 µs (started: 2021-06-21 02:03:01 +00:00)


In [None]:
df['pegasus'] = df['article'].apply(lambda x: summarize(x))

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at  /pytorch/aten/src/ATen/native/BinaryOps.cpp:467.)
  return torch.floor_divide(self, other)


time: 2min 48s (started: 2021-06-21 02:03:01 +00:00)


In [None]:
df.to_csv('output/180621_pegasus.csv')

time: 266 ms (started: 2021-06-21 02:05:50 +00:00)
